Improvements to mask management code; removes a number of unnecessary blends.
We now maintain a the distinction between the value of the mask passed into a function and the "internal" mask within the function that only accounts for varying control flow within the function. The full mask (the AND of the function mask and the internal mask) must be used for assignments to static and global variables, and reference function parameters. Further, it is the appropriate mask to use for making decisions about varying control flow. However, we can use the internal mask for assignments to variables declared in the current function (including the return value and non-reference parameters to the function). Doing so allows us to catch a few more cases where the internal mask is all on, even if the mask coming into the function wasn't all on, and thence use moves rather than blends for those assignments. (Which in turn can allow additional optimizations to happen.) Fixes issue #23.
This commit is contained in:
125
ctx.cpp
125
ctx.cpp
@@ -124,10 +124,10 @@ CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FunctionEmitContext::FunctionEmitContext(Function *function, Symbol *funSym,
|
||||
FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
||||
llvm::Function *llvmFunction,
|
||||
SourcePos firstStmtPos) {
|
||||
const Type *rt = function->GetReturnType();
|
||||
function = func;
|
||||
|
||||
/* Create a new basic block to store all of the allocas */
|
||||
allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0);
|
||||
@@ -136,9 +136,12 @@ FunctionEmitContext::FunctionEmitContext(Function *function, Symbol *funSym,
|
||||
llvm::BranchInst::Create(bblock, allocaBlock);
|
||||
|
||||
funcStartPos = funSym->pos;
|
||||
returnType = rt;
|
||||
maskPtr = NULL;
|
||||
entryMask = NULL;
|
||||
|
||||
internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory");
|
||||
StoreInst(LLVMMaskAllOn, internalMaskPointer);
|
||||
functionMaskValue = LLVMMaskAllOn;
|
||||
fullMaskPointer = NULL;
|
||||
|
||||
loopMask = NULL;
|
||||
breakLanesPtr = continueLanesPtr = NULL;
|
||||
breakTarget = continueTarget = NULL;
|
||||
@@ -151,6 +154,7 @@ FunctionEmitContext::FunctionEmitContext(Function *function, Symbol *funSym,
|
||||
StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType),
|
||||
launchGroupHandlePtr);
|
||||
|
||||
const Type *returnType = function->GetReturnType();
|
||||
if (!returnType || returnType == AtomicType::Void)
|
||||
returnValuePtr = NULL;
|
||||
else {
|
||||
@@ -164,7 +168,7 @@ FunctionEmitContext::FunctionEmitContext(Function *function, Symbol *funSym,
|
||||
/* If debugging is enabled, tell the debug information emission
|
||||
code about this new function */
|
||||
diFile = funcStartPos.GetDIFile();
|
||||
llvm::DIType retType = rt->GetDIType(diFile);
|
||||
llvm::DIType retType = function->GetReturnType()->GetDIType(diFile);
|
||||
int flags = llvm::DIDescriptor::FlagPrototyped; // ??
|
||||
diFunction = m->diBuilder->createFunction(diFile, /* scope */
|
||||
llvmFunction->getName(), // mangled
|
||||
@@ -208,6 +212,12 @@ FunctionEmitContext::~FunctionEmitContext() {
|
||||
}
|
||||
|
||||
|
||||
const Function *
|
||||
FunctionEmitContext::GetFunction() const {
|
||||
return function;
|
||||
}
|
||||
|
||||
|
||||
llvm::BasicBlock *
|
||||
FunctionEmitContext::GetCurrentBasicBlock() {
|
||||
return bblock;
|
||||
@@ -221,21 +231,41 @@ FunctionEmitContext::SetCurrentBasicBlock(llvm::BasicBlock *bb) {
|
||||
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetMask() {
|
||||
return LoadInst(maskPtr, NULL, "load_mask");
|
||||
FunctionEmitContext::GetFunctionMask() {
|
||||
return functionMaskValue;
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetInternalMask() {
|
||||
if (VaryingCFDepth() == 0)
|
||||
return LLVMMaskAllOn;
|
||||
else
|
||||
return LoadInst(internalMaskPointer, NULL, "load_mask");
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetFullMask() {
|
||||
llvm::Value *internalMask = GetInternalMask();
|
||||
if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn)
|
||||
return LLVMMaskAllOn;
|
||||
else
|
||||
return BinaryOperator(llvm::Instruction::And, GetInternalMask(),
|
||||
functionMaskValue, "internal_mask&function_mask");
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::SetMaskPointer(llvm::Value *p) {
|
||||
maskPtr = p;
|
||||
fullMaskPointer = p;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::SetEntryMask(llvm::Value *value) {
|
||||
entryMask = value;
|
||||
SetMask(value);
|
||||
FunctionEmitContext::SetFunctionMask(llvm::Value *value) {
|
||||
functionMaskValue = value;
|
||||
StoreInst(GetFullMask(), fullMaskPointer);
|
||||
}
|
||||
|
||||
|
||||
@@ -246,33 +276,35 @@ FunctionEmitContext::SetLoopMask(llvm::Value *value) {
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::SetMask(llvm::Value *value) {
|
||||
StoreInst(value, maskPtr);
|
||||
FunctionEmitContext::SetInternalMask(llvm::Value *value) {
|
||||
StoreInst(value, internalMaskPointer);
|
||||
// kludge so that __mask returns the right value in ispc code.
|
||||
StoreInst(GetFullMask(), fullMaskPointer);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::MaskAnd(llvm::Value *oldMask, llvm::Value *test) {
|
||||
FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) {
|
||||
llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask,
|
||||
test, "oldMask&test");
|
||||
SetMask(mask);
|
||||
SetInternalMask(mask);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::MaskAndNot(llvm::Value *oldMask, llvm::Value *test) {
|
||||
FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) {
|
||||
llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn,
|
||||
"~test");
|
||||
llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest,
|
||||
"oldMask&~test");
|
||||
SetMask(mask);
|
||||
SetInternalMask(mask);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
|
||||
assert(bblock != NULL);
|
||||
llvm::Value *any = Any(GetMask());
|
||||
llvm::Value *any = Any(GetFullMask());
|
||||
BranchInst(btrue, bfalse, any);
|
||||
// It's illegal to add any additional instructions to the basic block
|
||||
// now that it's terminated, so set bblock to NULL to be safe
|
||||
@@ -283,7 +315,7 @@ FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *
|
||||
void
|
||||
FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
|
||||
assert(bblock != NULL);
|
||||
llvm::Value *all = All(GetMask());
|
||||
llvm::Value *all = All(GetFullMask());
|
||||
BranchInst(btrue, bfalse, all);
|
||||
// It's illegal to add any additional instructions to the basic block
|
||||
// now that it's terminated, so set bblock to NULL to be safe
|
||||
@@ -303,8 +335,8 @@ FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::StartUniformIf(llvm::Value *oldMask) {
|
||||
controlFlowInfo.push_back(CFInfo::GetIf(true, oldMask));
|
||||
FunctionEmitContext::StartUniformIf() {
|
||||
controlFlowInfo.push_back(CFInfo::GetIf(true, GetInternalMask()));
|
||||
}
|
||||
|
||||
|
||||
@@ -343,7 +375,7 @@ FunctionEmitContext::EndIf() {
|
||||
assert(continueLanesPtr != NULL);
|
||||
|
||||
// newMask = (oldMask & ~(breakLanes | continueLanes))
|
||||
llvm::Value *oldMask = GetMask();
|
||||
llvm::Value *oldMask = GetInternalMask();
|
||||
llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL,
|
||||
"break_lanes");
|
||||
llvm::Value *continueLanes = LoadInst(continueLanesPtr, NULL,
|
||||
@@ -356,7 +388,7 @@ FunctionEmitContext::EndIf() {
|
||||
llvm::Value *newMask =
|
||||
BinaryOperator(llvm::Instruction::And, oldMask, notBreakOrContinue,
|
||||
"new_mask");
|
||||
SetMask(newMask);
|
||||
SetInternalMask(newMask);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -364,9 +396,10 @@ FunctionEmitContext::EndIf() {
|
||||
|
||||
void
|
||||
FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
||||
bool uniformCF, llvm::Value *oldMask) {
|
||||
bool uniformCF) {
|
||||
// Store the current values of various loop-related state so that we
|
||||
// can restore it when we exit this loop.
|
||||
llvm::Value *oldMask = GetInternalMask();
|
||||
controlFlowInfo.push_back(CFInfo::GetLoop(uniformCF, breakTarget,
|
||||
continueTarget, breakLanesPtr,
|
||||
continueLanesPtr, oldMask, loopMask));
|
||||
@@ -426,7 +459,7 @@ FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) {
|
||||
llvm::Value *notReturned = NotOperator(returnedLanes, "~returned_lanes");
|
||||
llvm::Value *newMask = BinaryOperator(llvm::Instruction::And,
|
||||
oldMask, notReturned, "new_mask");
|
||||
SetMask(newMask);
|
||||
SetInternalMask(newMask);
|
||||
}
|
||||
|
||||
|
||||
@@ -440,7 +473,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) {
|
||||
// If all of the enclosing 'if' tests in the loop have uniform control
|
||||
// flow or if we can tell that the mask is all on, then we can just
|
||||
// jump to the break location.
|
||||
if (ifsInLoopAllUniform() || GetMask() == LLVMMaskAllOn) {
|
||||
if (ifsInLoopAllUniform() || GetInternalMask() == LLVMMaskAllOn) {
|
||||
BranchInst(breakTarget);
|
||||
if (ifsInLoopAllUniform() && doCoherenceCheck)
|
||||
Warning(currentPos, "Coherent break statement not necessary in fully uniform "
|
||||
@@ -453,7 +486,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) {
|
||||
// executed a 'break' statement:
|
||||
// breakLanes = breakLanes | mask
|
||||
assert(breakLanesPtr != NULL);
|
||||
llvm::Value *mask = GetMask();
|
||||
llvm::Value *mask = GetInternalMask();
|
||||
llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, "break_mask");
|
||||
llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
|
||||
mask, breakMask, "mask|break_mask");
|
||||
@@ -463,7 +496,7 @@ FunctionEmitContext::Break(bool doCoherenceCheck) {
|
||||
// statements in the same scope after the 'break'. Most of time
|
||||
// this will be optimized away since we'll likely end the scope of
|
||||
// an 'if' statement and restore the mask then.
|
||||
SetMask(LLVMMaskAllOff);
|
||||
SetInternalMask(LLVMMaskAllOff);
|
||||
|
||||
if (doCoherenceCheck)
|
||||
// If the user has indicated that this is a 'coherent' break
|
||||
@@ -486,7 +519,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (ifsInLoopAllUniform() || GetMask() == LLVMMaskAllOn) {
|
||||
if (ifsInLoopAllUniform() || GetInternalMask() == LLVMMaskAllOn) {
|
||||
// Similarly to 'break' statements, we can immediately jump to the
|
||||
// continue target if we're only in 'uniform' control flow within
|
||||
// loop or if we can tell that the mask is all on.
|
||||
@@ -501,7 +534,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) {
|
||||
// Otherwise update the stored value of which lanes have 'continue'd.
|
||||
// continueLanes = continueLanes | mask
|
||||
assert(continueLanesPtr);
|
||||
llvm::Value *mask = GetMask();
|
||||
llvm::Value *mask = GetInternalMask();
|
||||
llvm::Value *continueMask =
|
||||
LoadInst(continueLanesPtr, NULL, "continue_mask");
|
||||
llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
|
||||
@@ -510,7 +543,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) {
|
||||
|
||||
// And set the current mask to be all off in case there are any
|
||||
// statements in the same scope after the 'continue'
|
||||
SetMask(LLVMMaskAllOff);
|
||||
SetInternalMask(LLVMMaskAllOff);
|
||||
|
||||
if (doCoherenceCheck)
|
||||
// If this is a 'coherent continue' statement, then emit the
|
||||
@@ -582,11 +615,11 @@ FunctionEmitContext::RestoreContinuedLanes() {
|
||||
return;
|
||||
|
||||
// mask = mask & continueFlags
|
||||
llvm::Value *mask = GetMask();
|
||||
llvm::Value *mask = GetInternalMask();
|
||||
llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, "continue_mask");
|
||||
llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or,
|
||||
mask, continueMask, "mask|continue_mask");
|
||||
SetMask(orMask);
|
||||
SetInternalMask(orMask);
|
||||
|
||||
// continueLanes = 0
|
||||
StoreInst(LLVMMaskAllOff, continueLanesPtr);
|
||||
@@ -605,6 +638,7 @@ FunctionEmitContext::VaryingCFDepth() const {
|
||||
|
||||
void
|
||||
FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
|
||||
const Type *returnType = function->GetReturnType();
|
||||
if (returnType == AtomicType::Void) {
|
||||
if (expr != NULL)
|
||||
Error(expr->pos, "Can't return non-void type \"%s\" from void function.",
|
||||
@@ -623,7 +657,8 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
|
||||
Expr *r = expr->TypeConv(returnType, "return statement");
|
||||
if (r != NULL) {
|
||||
llvm::Value *retVal = r->GetValue(this);
|
||||
StoreInst(retVal, returnValuePtr, GetMask(), returnType);
|
||||
if (retVal != NULL)
|
||||
StoreInst(retVal, returnValuePtr, GetInternalMask(), returnType);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -639,15 +674,16 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
|
||||
// the current lane mask.
|
||||
llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL,
|
||||
"old_returned_lanes");
|
||||
llvm::Value *newReturnedLanes = BinaryOperator(llvm::Instruction::Or,
|
||||
oldReturnedLanes,
|
||||
GetMask(), "old_mask|returned_lanes");
|
||||
llvm::Value *newReturnedLanes =
|
||||
BinaryOperator(llvm::Instruction::Or, oldReturnedLanes,
|
||||
GetInternalMask(), "old_mask|returned_lanes");
|
||||
|
||||
// For 'coherent' return statements, emit code to check if all
|
||||
// lanes have returned
|
||||
if (doCoherenceCheck) {
|
||||
// if newReturnedLanes == entryMask, get out of here!
|
||||
llvm::Value *cmp = MasksAllEqual(entryMask, newReturnedLanes);
|
||||
// if newReturnedLanes == functionMaskValue, get out of here!
|
||||
llvm::Value *cmp = MasksAllEqual(functionMaskValue,
|
||||
newReturnedLanes);
|
||||
llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return");
|
||||
llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return");
|
||||
BranchInst(bDoReturn, bNoReturn, cmp);
|
||||
@@ -663,7 +699,7 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
|
||||
// same scope after the return have no effect
|
||||
StoreInst(newReturnedLanes, returnedLanesPtr);
|
||||
AddInstrumentationPoint("return: some but not all lanes have returned");
|
||||
SetMask(LLVMMaskAllOff);
|
||||
SetInternalMask(LLVMMaskAllOff);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -812,7 +848,7 @@ FunctionEmitContext::AddInstrumentationPoint(const char *note) {
|
||||
// arg 3: line number
|
||||
args.push_back(LLVMInt32(currentPos.first_line));
|
||||
// arg 4: current mask, movmsk'ed down to an int32
|
||||
args.push_back(LaneMask(GetMask()));
|
||||
args.push_back(LaneMask(GetFullMask()));
|
||||
|
||||
llvm::Function *finst = m->module->getFunction("ISPCInstrument");
|
||||
CallInst(finst, args, "");
|
||||
@@ -1437,7 +1473,7 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
|
||||
// do the actual gather
|
||||
AddInstrumentationPoint("gather");
|
||||
|
||||
llvm::Value *mask = GetMask();
|
||||
llvm::Value *mask = GetFullMask();
|
||||
llvm::Function *gather = NULL;
|
||||
// Figure out which gather function to call based on the size of
|
||||
// the elements.
|
||||
@@ -1899,6 +1935,7 @@ FunctionEmitContext::ReturnInst() {
|
||||
// Add a sync call at the end of any function that launched tasks
|
||||
SyncInst();
|
||||
|
||||
const Type *returnType = function->GetReturnType();
|
||||
llvm::Instruction *rinst = NULL;
|
||||
if (returnValuePtr != NULL) {
|
||||
// We have value(s) to return; load them from their storage
|
||||
@@ -1957,7 +1994,7 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee,
|
||||
}
|
||||
|
||||
// copy in the mask
|
||||
llvm::Value *mask = GetMask();
|
||||
llvm::Value *mask = GetFullMask();
|
||||
llvm::Value *ptr = GetElementPtrInst(argmem, 0, argVals.size(),
|
||||
"funarg_mask");
|
||||
StoreInst(mask, ptr);
|
||||
|
||||
64
ctx.h
64
ctx.h
@@ -71,6 +71,10 @@ public:
|
||||
SourcePos firstStmtPos);
|
||||
~FunctionEmitContext();
|
||||
|
||||
/** Returns the Function * corresponding to the function that we're
|
||||
currently generating code for. */
|
||||
const Function *GetFunction() const;
|
||||
|
||||
/** @name Current basic block management
|
||||
@{
|
||||
*/
|
||||
@@ -84,22 +88,33 @@ public:
|
||||
/** @name Mask management
|
||||
@{
|
||||
*/
|
||||
/** Returns the current mask value */
|
||||
llvm::Value *GetMask();
|
||||
/** Returns the mask value at entry to the current function. */
|
||||
llvm::Value *GetFunctionMask();
|
||||
|
||||
/** Returns the mask value corresponding to "varying" control flow
|
||||
within the current function. (i.e. this doesn't include the effect
|
||||
of the mask at function entry. */
|
||||
llvm::Value *GetInternalMask();
|
||||
|
||||
/** Returns the complete current mask value--i.e. the logical AND of
|
||||
the function entry mask and the internal mask. */
|
||||
llvm::Value *GetFullMask();
|
||||
|
||||
/** Provides the alloca'd pointer to memory to store the full function
|
||||
mask. This is only used to wire up the __mask builtin variable. */
|
||||
void SetMaskPointer(llvm::Value *p);
|
||||
|
||||
/** Provides the value of the mask at function entry */
|
||||
void SetEntryMask(llvm::Value *val);
|
||||
void SetFunctionMask(llvm::Value *val);
|
||||
|
||||
/** Sets the mask to a new value */
|
||||
void SetMask(llvm::Value *val);
|
||||
/** Sets the internal mask to a new value */
|
||||
void SetInternalMask(llvm::Value *val);
|
||||
|
||||
/** Sets the mask to (oldMask & val) */
|
||||
void MaskAnd(llvm::Value *oldMask, llvm::Value *val);
|
||||
/** Sets the internal mask to (oldMask & val) */
|
||||
void SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *val);
|
||||
|
||||
/** Sets the mask to (oldMask & ~val) */
|
||||
void MaskAndNot(llvm::Value *oldMask, llvm::Value *test);
|
||||
/** Sets the internal mask to (oldMask & ~val) */
|
||||
void SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test);
|
||||
|
||||
/** Emits a branch instruction to the basic block btrue if any of the
|
||||
lanes of current mask are on and bfalse if none are on. */
|
||||
@@ -118,9 +133,8 @@ public:
|
||||
@{
|
||||
*/
|
||||
/** Notifies the FunctionEmitContext that we're starting emission of an
|
||||
'if' statement with a uniform test. The value of the mask going
|
||||
into the 'if' statement is provided in the oldMask parameter. */
|
||||
void StartUniformIf(llvm::Value *oldMask);
|
||||
'if' statement with a uniform test. */
|
||||
void StartUniformIf();
|
||||
|
||||
/** Notifies the FunctionEmitContext that we're starting emission of an
|
||||
'if' statement with a varying test. The value of the mask going
|
||||
@@ -135,10 +149,9 @@ public:
|
||||
for a loop. Basic blocks are provides for where 'break' and
|
||||
'continue' statements should jump to (if all running lanes want to
|
||||
break or continue), uniformControlFlow indicates whether the loop
|
||||
condition is 'uniform', and oldMask provides the current mask going
|
||||
into the loop. */
|
||||
condition is 'uniform'. */
|
||||
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
|
||||
bool uniformControlFlow, llvm::Value *oldMask);
|
||||
bool uniformControlFlow);
|
||||
|
||||
/** Informs FunctionEmitContext of the value of the mask at the start
|
||||
of a loop body. */
|
||||
@@ -404,6 +417,9 @@ public:
|
||||
/** @} */
|
||||
|
||||
private:
|
||||
/** Pointer to the Function for which we're currently generating code. */
|
||||
Function *function;
|
||||
|
||||
/** The basic block into which we add any alloca instructions that need
|
||||
to go at the very start of the function. */
|
||||
llvm::BasicBlock *allocaBlock;
|
||||
@@ -413,8 +429,16 @@ private:
|
||||
llvm::BasicBlock *bblock;
|
||||
|
||||
/** Pointer to stack-allocated memory that stores the current value of
|
||||
the program mask. */
|
||||
llvm::Value *maskPtr;
|
||||
the full program mask. */
|
||||
llvm::Value *fullMaskPointer;
|
||||
|
||||
/** Pointer to stack-allocated memory that stores the current value of
|
||||
the program mask representing varying control flow within the
|
||||
function. */
|
||||
llvm::Value *internalMaskPointer;
|
||||
|
||||
/** Value of the program mask when the function starts execution. */
|
||||
llvm::Value *functionMaskValue;
|
||||
|
||||
/** Current source file position; if debugging information is being
|
||||
generated, this position is used to set file/line information for
|
||||
@@ -425,12 +449,6 @@ private:
|
||||
for error messages and debugging symbols. */
|
||||
SourcePos funcStartPos;
|
||||
|
||||
/** Type of result that the current function returns. */
|
||||
const Type *returnType;
|
||||
|
||||
/** Value of the program mask when the function starts execution. */
|
||||
llvm::Value *entryMask;
|
||||
|
||||
/** If currently in a loop body, the value of the mask at the start of
|
||||
the loop. */
|
||||
llvm::Value *loopMask;
|
||||
|
||||
49
expr.cpp
49
expr.cpp
@@ -505,7 +505,12 @@ lEmitPrePostIncDec(UnaryExpr::Op op, Expr *expr, SourcePos pos,
|
||||
#endif
|
||||
|
||||
// And store the result out to the lvalue
|
||||
ctx->StoreInst(binop, lvalue, ctx->GetMask(), type);
|
||||
Symbol *baseSym = expr->GetBaseSymbol();
|
||||
assert(baseSym != NULL);
|
||||
llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() &&
|
||||
baseSym->storageClass != SC_STATIC) ?
|
||||
ctx->GetInternalMask() : ctx->GetFullMask();
|
||||
ctx->StoreInst(binop, lvalue, mask, type);
|
||||
|
||||
// And then if it's a pre increment/decrement, return the final
|
||||
// computed result; otherwise return the previously-grabbed expression
|
||||
@@ -1509,8 +1514,12 @@ lStoreAssignResult(llvm::Value *rv, llvm::Value *lv, const Type *type,
|
||||
// goes out of scope.
|
||||
ctx->StoreInst(rv, lv, LLVMMaskAllOn, type);
|
||||
}
|
||||
else
|
||||
ctx->StoreInst(rv, lv, ctx->GetMask(), type);
|
||||
else {
|
||||
llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() &&
|
||||
baseSym->storageClass != SC_STATIC) ?
|
||||
ctx->GetInternalMask() : ctx->GetFullMask();
|
||||
ctx->StoreInst(rv, lv, mask, type);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1844,12 +1853,12 @@ SelectExpr::GetValue(FunctionEmitContext *ctx) const {
|
||||
// element-wise select to get the result
|
||||
llvm::Value *testVal = test->GetValue(ctx);
|
||||
assert(testVal->getType() == LLVMTypes::MaskType);
|
||||
llvm::Value *oldMask = ctx->GetMask();
|
||||
ctx->MaskAnd(oldMask, testVal);
|
||||
llvm::Value *oldMask = ctx->GetInternalMask();
|
||||
ctx->SetInternalMaskAnd(oldMask, testVal);
|
||||
llvm::Value *expr1Val = expr1->GetValue(ctx);
|
||||
ctx->MaskAndNot(oldMask, testVal);
|
||||
ctx->SetInternalMaskAndNot(oldMask, testVal);
|
||||
llvm::Value *expr2Val = expr2->GetValue(ctx);
|
||||
ctx->SetMask(oldMask);
|
||||
ctx->SetInternalMask(oldMask);
|
||||
|
||||
return lEmitVaryingSelect(ctx, testVal, expr1Val, expr2Val, type);
|
||||
}
|
||||
@@ -2415,7 +2424,7 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
|
||||
callargs.size() == callee->arg_size());
|
||||
|
||||
if (callargs.size() + 1 == callee->arg_size())
|
||||
argVals.push_back(ctx->GetMask());
|
||||
argVals.push_back(ctx->GetFullMask());
|
||||
|
||||
retVal = ctx->CallInst(callee, argVals, isVoidFunc ? "" : "calltmp");
|
||||
}
|
||||
@@ -2430,10 +2439,19 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
|
||||
assert(rt != NULL);
|
||||
llvm::Value *load = ctx->LoadInst(ptr, rt->GetReferenceTarget(),
|
||||
"load_ref");
|
||||
// FIXME: apply the "don't do blending" optimization here if
|
||||
// appropriate?
|
||||
ctx->StoreInst(load, argValLValues[i], ctx->GetMask(),
|
||||
rt->GetReferenceTarget());
|
||||
|
||||
Symbol *baseSym = callargs[i]->GetBaseSymbol();
|
||||
assert(baseSym != NULL);
|
||||
//CO if (baseSym->varyingCFDepth == ctx->VaryingCFDepth() &&
|
||||
//CO baseSym->storageClass != SC_STATIC)
|
||||
//CO ctx->StoreInst(load, argValLValues[i], LLVMMaskAllOn,
|
||||
//CO rt->GetReferenceTarget());
|
||||
//CO else {
|
||||
llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() &&
|
||||
baseSym->storageClass != SC_STATIC) ?
|
||||
ctx->GetInternalMask() : ctx->GetFullMask();
|
||||
ctx->StoreInst(load, argValLValues[i], mask, rt->GetReferenceTarget());
|
||||
//CO }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5206,10 +5224,9 @@ SymbolExpr::Optimize() {
|
||||
|
||||
int
|
||||
SymbolExpr::EstimateCost() const {
|
||||
if (symbol->constValue != NULL)
|
||||
return 0;
|
||||
else
|
||||
return COST_LOAD;
|
||||
// Be optimistic and assume it's in a register or can be used as a
|
||||
// memory operand..
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
11
func.cpp
11
func.cpp
@@ -104,7 +104,10 @@ Function::Function(DeclSpecs *ds, Declarator *decl, Stmt *c) {
|
||||
for (unsigned int i = 0; i < decl->functionArgs->size(); ++i) {
|
||||
Declaration *pdecl = (*decl->functionArgs)[i];
|
||||
assert(pdecl->declarators.size() == 1);
|
||||
args.push_back(pdecl->declarators[0]->sym);
|
||||
Symbol *sym = pdecl->declarators[0]->sym;
|
||||
if (dynamic_cast<const ReferenceType *>(sym->type) == NULL)
|
||||
sym->parentFunction = this;
|
||||
args.push_back(sym);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -462,7 +465,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
llvm::Value *ptr = ctx->GetElementPtrInst(structParamPtr, 0, nArgs,
|
||||
"task_struct_mask");
|
||||
llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, "mask");
|
||||
ctx->SetEntryMask(ptrval);
|
||||
ctx->SetFunctionMask(ptrval);
|
||||
|
||||
// Copy threadIndex and threadCount into stack-allocated storage so
|
||||
// that their symbols point to something reasonable.
|
||||
@@ -500,12 +503,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
// happens for exmaple with 'export'ed functions that the app
|
||||
// calls.
|
||||
if (argIter == function->arg_end())
|
||||
ctx->SetEntryMask(LLVMMaskAllOn);
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
else {
|
||||
// Otherwise use the mask to set the entry mask value
|
||||
argIter->setName("__mask");
|
||||
assert(argIter->getType() == LLVMTypes::MaskType);
|
||||
ctx->SetEntryMask(argIter);
|
||||
ctx->SetFunctionMask(argIter);
|
||||
assert(++argIter == function->arg_end());
|
||||
}
|
||||
}
|
||||
|
||||
81
stmt.cpp
81
stmt.cpp
@@ -322,6 +322,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// this before the initializer stuff.
|
||||
ctx->EmitVariableDebugInfo(sym);
|
||||
// And then get it initialized...
|
||||
sym->parentFunction = ctx->GetFunction();
|
||||
lInitSymbol(sym->storagePtr, sym->name.c_str(), type, decl->initExpr,
|
||||
ctx, sym->pos);
|
||||
}
|
||||
@@ -461,7 +462,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
return;
|
||||
|
||||
if (isUniform) {
|
||||
ctx->StartUniformIf(ctx->GetMask());
|
||||
ctx->StartUniformIf();
|
||||
if (doAllCheck)
|
||||
Warning(test->pos, "Uniform condition supplied to \"cif\" statement.");
|
||||
|
||||
@@ -571,14 +572,14 @@ void
|
||||
IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
llvm::Value *test) const {
|
||||
if (trueStmts) {
|
||||
ctx->MaskAnd(oldMask, test);
|
||||
ctx->SetInternalMaskAnd(oldMask, test);
|
||||
lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
|
||||
// under varying control flow,, returns can't stop instruction
|
||||
// emission, so this better be non-NULL...
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
}
|
||||
if (falseStmts) {
|
||||
ctx->MaskAndNot(oldMask, test);
|
||||
ctx->SetInternalMaskAndNot(oldMask, test);
|
||||
lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
}
|
||||
@@ -764,8 +765,8 @@ lSafeToRunWithAllLanesOff(Stmt *stmt) {
|
||||
*/
|
||||
void
|
||||
IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
llvm::Value *oldMask = ctx->GetMask();
|
||||
if (oldMask == LLVMMaskAllOn) {
|
||||
llvm::Value *oldMask = ctx->GetInternalMask();
|
||||
if (ctx->GetFullMask() == LLVMMaskAllOn) {
|
||||
// We can tell that the mask is on statically at compile time; just
|
||||
// emit code for the 'if test with the mask all on' path
|
||||
llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done");
|
||||
@@ -782,18 +783,11 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
llvm::BasicBlock *bDone = ctx->CreateBasicBlock("cif_done");
|
||||
|
||||
// Jump to either bAllOn or bMixedOn, depending on the mask's value
|
||||
llvm::Value *maskAllQ = ctx->All(oldMask);
|
||||
llvm::Value *maskAllQ = ctx->All(ctx->GetFullMask());
|
||||
ctx->BranchInst(bAllOn, bMixedOn, maskAllQ);
|
||||
|
||||
// Emit code for the 'mask all on' case
|
||||
ctx->SetCurrentBasicBlock(bAllOn);
|
||||
// We start by explicitly storing "all on" into the mask mask.
|
||||
// Note that this doesn't change its actual value, but doing so
|
||||
// lets the compiler see what's going on so that subsequent
|
||||
// optimizations for code emitted here can operate with the
|
||||
// knowledge that the mask is definitely all on (until it modifies
|
||||
// the mask itself).
|
||||
ctx->SetMask(LLVMMaskAllOn);
|
||||
emitMaskAllOn(ctx, ltest, bDone);
|
||||
|
||||
// And emit code for the mixed mask case
|
||||
@@ -845,11 +839,20 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
|
||||
|
||||
/** Emits code for 'if' tests under the case where we know that the program
|
||||
mask is all on.
|
||||
mask is all on going into the 'if'.
|
||||
*/
|
||||
void
|
||||
IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
|
||||
llvm::BasicBlock *bDone) const {
|
||||
// We start by explicitly storing "all on" into the mask mask. Note
|
||||
// that this doesn't change its actual value, but doing so lets the
|
||||
// compiler see what's going on so that subsequent optimizations for
|
||||
// code emitted here can operate with the knowledge that the mask is
|
||||
// definitely all on (until it modifies the mask itself).
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
|
||||
// First, check the value of the test. If it's all on, then we jump to
|
||||
// a basic block that will only have code for the true case.
|
||||
llvm::BasicBlock *bTestAll = ctx->CreateBasicBlock("cif_test_all");
|
||||
@@ -896,6 +899,9 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->EndIf();
|
||||
ctx->BranchInst(bDone);
|
||||
|
||||
ctx->SetCurrentBasicBlock(bDone);
|
||||
ctx->SetFunctionMask(oldFunctionMask);
|
||||
}
|
||||
|
||||
|
||||
@@ -909,11 +915,11 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
llvm::BasicBlock *bNext = ctx->CreateBasicBlock("safe_if_after_true");
|
||||
if (trueStmts != NULL) {
|
||||
llvm::BasicBlock *bRunTrue = ctx->CreateBasicBlock("safe_if_run_true");
|
||||
ctx->MaskAnd(oldMask, ltest);
|
||||
ctx->SetInternalMaskAnd(oldMask, ltest);
|
||||
|
||||
// Do any of the program instances want to run the 'true'
|
||||
// block? If not, jump ahead to bNext.
|
||||
llvm::Value *maskAnyQ = ctx->Any(ctx->GetMask());
|
||||
llvm::Value *maskAnyQ = ctx->Any(ctx->GetFullMask());
|
||||
ctx->BranchInst(bRunTrue, bNext, maskAnyQ);
|
||||
|
||||
// Emit statements for true
|
||||
@@ -926,11 +932,11 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
if (falseStmts != NULL) {
|
||||
llvm::BasicBlock *bRunFalse = ctx->CreateBasicBlock("safe_if_run_false");
|
||||
bNext = ctx->CreateBasicBlock("safe_if_after_false");
|
||||
ctx->MaskAndNot(oldMask, ltest);
|
||||
ctx->SetInternalMaskAndNot(oldMask, ltest);
|
||||
|
||||
// Similarly, check to see if any of the instances want to
|
||||
// run the 'false' block...
|
||||
llvm::Value *maskAnyQ = ctx->Any(ctx->GetMask());
|
||||
llvm::Value *maskAnyQ = ctx->Any(ctx->GetFullMask());
|
||||
ctx->BranchInst(bRunFalse, bNext, maskAnyQ);
|
||||
|
||||
// Emit code for false
|
||||
@@ -1019,14 +1025,14 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
llvm::BasicBlock *bexit = ctx->CreateBasicBlock("do_exit");
|
||||
llvm::BasicBlock *btest = ctx->CreateBasicBlock("do_test");
|
||||
|
||||
ctx->StartLoop(bexit, btest, uniformTest, ctx->GetMask());
|
||||
ctx->StartLoop(bexit, btest, uniformTest);
|
||||
|
||||
// Start by jumping into the loop body
|
||||
ctx->BranchInst(bloop);
|
||||
|
||||
// And now emit code for the loop body
|
||||
ctx->SetCurrentBasicBlock(bloop);
|
||||
ctx->SetLoopMask(ctx->GetMask());
|
||||
ctx->SetLoopMask(ctx->GetInternalMask());
|
||||
ctx->SetDebugPos(pos);
|
||||
// FIXME: in the StmtList::EmitCode() method takes starts/stops a new
|
||||
// scope around the statements in the list. So if the body is just a
|
||||
@@ -1047,10 +1053,13 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// IfStmt::emitCoherentTests()), and then emit the code for the
|
||||
// loop body.
|
||||
ctx->SetCurrentBasicBlock(bAllOn);
|
||||
ctx->SetMask(LLVMMaskAllOn);
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (bodyStmts)
|
||||
bodyStmts->EmitCode(ctx);
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->SetFunctionMask(oldFunctionMask);
|
||||
ctx->BranchInst(btest);
|
||||
|
||||
// The mask is mixed. Just emit the code for the loop body.
|
||||
@@ -1093,8 +1102,8 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// For the varying case, update the mask based on the value of the
|
||||
// test. If any program instances still want to be running, jump
|
||||
// to the top of the loop. Otherwise, jump out.
|
||||
llvm::Value *mask = ctx->GetMask();
|
||||
ctx->MaskAnd(mask, testValue);
|
||||
llvm::Value *mask = ctx->GetInternalMask();
|
||||
ctx->SetInternalMaskAnd(mask, testValue);
|
||||
ctx->BranchIfMaskAny(bloop, bexit);
|
||||
}
|
||||
|
||||
@@ -1203,7 +1212,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
(!g->opt.disableUniformControlFlow &&
|
||||
!lHasVaryingBreakOrContinue(stmts));
|
||||
|
||||
ctx->StartLoop(bexit, bstep, uniformTest, ctx->GetMask());
|
||||
ctx->StartLoop(bexit, bstep, uniformTest);
|
||||
ctx->SetDebugPos(pos);
|
||||
|
||||
// If we have an initiailizer statement, start by emitting the code for
|
||||
@@ -1216,17 +1225,6 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
}
|
||||
ctx->BranchInst(btest);
|
||||
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
#if 0
|
||||
if (!ctx->GetCurrentBasicBlock()) {
|
||||
// when does this happen??
|
||||
if (init)
|
||||
ctx->EndScope();
|
||||
ctx->EndLoop();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Emit code to get the value of the loop test. If no test expression
|
||||
// was provided, just go with a true value.
|
||||
ctx->SetCurrentBasicBlock(btest);
|
||||
@@ -1253,14 +1251,14 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->BranchInst(bloop, bexit, ltest);
|
||||
}
|
||||
else {
|
||||
llvm::Value *mask = ctx->GetMask();
|
||||
ctx->MaskAnd(mask, ltest);
|
||||
llvm::Value *mask = ctx->GetInternalMask();
|
||||
ctx->SetInternalMaskAnd(mask, ltest);
|
||||
ctx->BranchIfMaskAny(bloop, bexit);
|
||||
}
|
||||
|
||||
// On to emitting the code for the loop body.
|
||||
ctx->SetCurrentBasicBlock(bloop);
|
||||
ctx->SetLoopMask(ctx->GetMask());
|
||||
ctx->SetLoopMask(ctx->GetInternalMask());
|
||||
ctx->AddInstrumentationPoint("for loop body");
|
||||
if (!dynamic_cast<StmtList *>(stmts))
|
||||
ctx->StartScope();
|
||||
@@ -1278,10 +1276,13 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// the runtime test has passed, make this fact clear for code
|
||||
// generation at compile time here.)
|
||||
ctx->SetCurrentBasicBlock(bAllOn);
|
||||
ctx->SetMask(LLVMMaskAllOn);
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (stmts)
|
||||
stmts->EmitCode(ctx);
|
||||
assert(ctx->GetCurrentBasicBlock());
|
||||
ctx->SetFunctionMask(oldFunctionMask);
|
||||
ctx->BranchInst(bstep);
|
||||
|
||||
// Emit code for the mask being mixed. We should never run the
|
||||
@@ -1773,7 +1774,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
args[0] = ctx->GetStringPtr(format);
|
||||
args[1] = ctx->GetStringPtr(argTypes);
|
||||
args[2] = LLVMInt32(g->target.vectorWidth);
|
||||
args[3] = ctx->LaneMask(ctx->GetMask());
|
||||
args[3] = ctx->LaneMask(ctx->GetFullMask());
|
||||
std::vector<llvm::Value *> argVec(&args[0], &args[5]);
|
||||
ctx->CallInst(printFunc, argVec, "");
|
||||
}
|
||||
|
||||
1
sym.cpp
1
sym.cpp
@@ -52,6 +52,7 @@ Symbol::Symbol(const std::string &n, SourcePos p, const Type *t,
|
||||
constValue = NULL;
|
||||
storageClass = sc;
|
||||
varyingCFDepth = 0;
|
||||
parentFunction = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
4
sym.h
4
sym.h
@@ -108,6 +108,10 @@ public:
|
||||
masked stores when modifying the symbol's value when the
|
||||
store is done at the same 'varying' control flow depth as
|
||||
the one where the symbol was originally declared. */
|
||||
const Function *parentFunction;
|
||||
/*!< For symbols that are parameters to functions or are
|
||||
variables declared inside functions, this gives the
|
||||
function they're in. */
|
||||
};
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user