Be more careful about using the right mask when emitting gathers.

Specifically, we had been using the full mask for all gathers, rather than
using the internal mask when we were loading from locally-declared arrays.
Thus, given code like:

  uniform float x[programCount] = { .. . };
  float xx = x[programIndex];

Previously we weren't generating a plain vector load to initialize xx, when
this code was in a function where it wasn't known that the mask was all on,
even though it should have.  Now it does.
This commit is contained in:
Matt Pharr
2011-10-17 20:25:52 -04:00
parent 19087e4761
commit 290032f4f5
4 changed files with 83 additions and 54 deletions

58
ctx.cpp
View File

@@ -241,7 +241,7 @@ FunctionEmitContext::GetInternalMask() {
if (VaryingCFDepth() == 0)
return LLVMMaskAllOn;
else
return LoadInst(internalMaskPointer, NULL, "load_mask");
return LoadInst(internalMaskPointer, NULL, NULL, "load_mask");
}
@@ -376,9 +376,9 @@ FunctionEmitContext::EndIf() {
// newMask = (oldMask & ~(breakLanes | continueLanes))
llvm::Value *oldMask = GetInternalMask();
llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL,
llvm::Value *breakLanes = LoadInst(breakLanesPtr, NULL, NULL,
"break_lanes");
llvm::Value *continueLanes = LoadInst(continueLanesPtr, NULL,
llvm::Value *continueLanes = LoadInst(continueLanesPtr, NULL, NULL,
"continue_lanes");
llvm::Value *breakOrContinueLanes =
BinaryOperator(llvm::Instruction::Or, breakLanes, continueLanes,
@@ -455,7 +455,8 @@ FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) {
// Restore the mask to the given old mask, but leave off any lanes that
// executed a return statement.
// newMask = (oldMask & ~returnedLanes)
llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, NULL, "returned_lanes");
llvm::Value *returnedLanes = LoadInst(returnedLanesPtr, NULL, NULL,
"returned_lanes");
llvm::Value *notReturned = NotOperator(returnedLanes, "~returned_lanes");
llvm::Value *newMask = BinaryOperator(llvm::Instruction::And,
oldMask, notReturned, "new_mask");
@@ -487,7 +488,8 @@ FunctionEmitContext::Break(bool doCoherenceCheck) {
// breakLanes = breakLanes | mask
assert(breakLanesPtr != NULL);
llvm::Value *mask = GetInternalMask();
llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, "break_mask");
llvm::Value *breakMask = LoadInst(breakLanesPtr, NULL, NULL,
"break_mask");
llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
mask, breakMask, "mask|break_mask");
StoreInst(newMask, breakLanesPtr);
@@ -536,7 +538,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) {
assert(continueLanesPtr);
llvm::Value *mask = GetInternalMask();
llvm::Value *continueMask =
LoadInst(continueLanesPtr, NULL, "continue_mask");
LoadInst(continueLanesPtr, NULL, NULL, "continue_mask");
llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
mask, continueMask, "mask|continueMask");
StoreInst(newMask, continueLanesPtr);
@@ -580,9 +582,12 @@ FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) {
// Check to see if (returned lanes | continued lanes | break lanes) is
// equal to the value of mask at the start of the loop iteration. If
// so, everyone is done and we can jump to the given target
llvm::Value *returned = LoadInst(returnedLanesPtr, NULL, "returned_lanes");
llvm::Value *continued = LoadInst(continueLanesPtr, NULL, "continue_lanes");
llvm::Value *breaked = LoadInst(breakLanesPtr, NULL, "break_lanes");
llvm::Value *returned = LoadInst(returnedLanesPtr, NULL, NULL,
"returned_lanes");
llvm::Value *continued = LoadInst(continueLanesPtr, NULL, NULL,
"continue_lanes");
llvm::Value *breaked = LoadInst(breakLanesPtr, NULL, NULL,
"break_lanes");
llvm::Value *returnedOrContinued = BinaryOperator(llvm::Instruction::Or,
returned, continued,
"returned|continued");
@@ -616,7 +621,8 @@ FunctionEmitContext::RestoreContinuedLanes() {
// mask = mask & continueFlags
llvm::Value *mask = GetInternalMask();
llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, "continue_mask");
llvm::Value *continueMask = LoadInst(continueLanesPtr, NULL, NULL,
"continue_mask");
llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or,
mask, continueMask, "mask|continue_mask");
SetInternalMask(orMask);
@@ -672,7 +678,7 @@ FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
else {
// Otherwise we update the returnedLanes value by ANDing it with
// the current lane mask.
llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL,
llvm::Value *oldReturnedLanes = LoadInst(returnedLanesPtr, NULL, NULL,
"old_returned_lanes");
llvm::Value *newReturnedLanes =
BinaryOperator(llvm::Instruction::Or, oldReturnedLanes,
@@ -1373,8 +1379,8 @@ FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, int v0, int v1,
llvm::Value *
FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type,
const char *name) {
FunctionEmitContext::LoadInst(llvm::Value *lvalue, llvm::Value *mask,
const Type *type, const char *name) {
if (lvalue == NULL) {
assert(m->errorCount > 0);
return NULL;
@@ -1406,16 +1412,16 @@ FunctionEmitContext::LoadInst(llvm::Value *lvalue, const Type *type,
// gather path here (we can't reliably figure out all of the type
// information we need from the LLVM::Type, so have to carry the
// ispc type in through this path..
assert(type != NULL);
assert(type != NULL && mask != NULL);
assert(llvm::isa<LLVM_TYPE_CONST llvm::ArrayType>(lvalue->getType()));
return gather(lvalue, type, name);
return gather(lvalue, mask, type, name);
}
}
llvm::Value *
FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
const char *name) {
FunctionEmitContext::gather(llvm::Value *lvalue, llvm::Value *mask,
const Type *type, const char *name) {
// We should have a varying lvalue if we get here...
assert(llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(lvalue->getType()));
@@ -1429,8 +1435,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
for (int i = 0; i < st->GetElementCount(); ++i) {
llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i);
// This in turn will be another gather
llvm::Value *eltValues = LoadInst(eltPtrs, st->GetElementType(i),
name);
llvm::Value *eltValues =
LoadInst(eltPtrs, mask, st->GetElementType(i), name);
retValue = InsertInst(retValue, eltValues, i, "set_value");
}
return retValue;
@@ -1451,7 +1457,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
for (int i = 0; i < vt->GetElementCount(); ++i) {
llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i);
llvm::Value *eltValues = LoadInst(eltPtrs, vt->GetBaseType(), name);
llvm::Value *eltValues = LoadInst(eltPtrs, mask, vt->GetBaseType(),
name);
retValue = InsertInst(retValue, eltValues, i, "set_value");
}
return retValue;
@@ -1463,7 +1470,8 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
llvm::Value *retValue = llvm::UndefValue::get(retType);
for (int i = 0; i < at->GetElementCount(); ++i) {
llvm::Value *eltPtrs = GetElementPtrInst(lvalue, 0, i);
llvm::Value *eltValues = LoadInst(eltPtrs, at->GetElementType(), name);
llvm::Value *eltValues = LoadInst(eltPtrs, mask,
at->GetElementType(), name);
retValue = InsertInst(retValue, eltValues, i, "set_value");
}
return retValue;
@@ -1473,7 +1481,6 @@ FunctionEmitContext::gather(llvm::Value *lvalue, const Type *type,
// do the actual gather
AddInstrumentationPoint("gather");
llvm::Value *mask = GetFullMask();
llvm::Function *gather = NULL;
// Figure out which gather function to call based on the size of
// the elements.
@@ -1935,17 +1942,16 @@ FunctionEmitContext::ReturnInst() {
// Add a sync call at the end of any function that launched tasks
SyncInst();
const Type *returnType = function->GetReturnType();
llvm::Instruction *rinst = NULL;
if (returnValuePtr != NULL) {
// We have value(s) to return; load them from their storage
// location
llvm::Value *retVal = LoadInst(returnValuePtr, returnType,
llvm::Value *retVal = LoadInst(returnValuePtr, NULL, NULL,
"return_value");
rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
}
else {
assert(returnType == AtomicType::Void);
assert(function->GetReturnType() == AtomicType::Void);
rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
}
@@ -2016,7 +2022,7 @@ FunctionEmitContext::LaunchInst(llvm::Function *callee,
void
FunctionEmitContext::SyncInst() {
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr, NULL);
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr, NULL, NULL);
llvm::Value *nullPtrValue = llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
llvm::CmpInst::ICMP_NE,

18
ctx.h
View File

@@ -342,13 +342,13 @@ public:
llvm::Value *GetElementPtrInst(llvm::Value *basePtr, int v0, int v1,
const char *name = NULL);
/** Load from the memory location(s) given by lvalue. The lvalue may
be varying, in which case this corresponds to a gather from the
multiple memory locations given by the array of pointer values
given by the lvalue. If the lvalue is not varying, then the type
parameter may be NULL. */
llvm::Value *LoadInst(llvm::Value *lvalue, const Type *type,
const char *name = NULL);
/** Load from the memory location(s) given by lvalue, using the given
mask. The lvalue may be varying, in which case this corresponds to
a gather from the multiple memory locations given by the array of
pointer values given by the lvalue. If the lvalue is not varying,
then both the mask pointer and the type pointer may be NULL. */
llvm::Value *LoadInst(llvm::Value *lvalue, llvm::Value *mask,
const Type *type, const char *name = NULL);
/** Emits an alloca instruction to allocate stack storage for the given
type. If a non-zero alignment is specified, the object is also
@@ -521,8 +521,8 @@ private:
void scatter(llvm::Value *rvalue, llvm::Value *lvalue,
llvm::Value *maskPtr, const Type *rvalueType);
llvm::Value *gather(llvm::Value *lvalue, const Type *type,
const char *name);
llvm::Value *gather(llvm::Value *lvalue, llvm::Value *mask,
const Type *type, const char *name);
void maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
const Type *rvalueType, llvm::Value *maskPtr);
};

View File

@@ -457,6 +457,15 @@ lLLVMConstantValue(const Type *type, llvm::LLVMContext *ctx, double value) {
}
static llvm::Value *
lMaskForSymbol(Symbol *baseSym, FunctionEmitContext *ctx) {
llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() &&
baseSym->storageClass != SC_STATIC) ?
ctx->GetInternalMask() : ctx->GetFullMask();
return mask;
}
/** Store the result of an assignment to the given location.
*/
static void
@@ -479,10 +488,7 @@ lStoreAssignResult(llvm::Value *rv, llvm::Value *lv, const Type *type,
ctx->StoreInst(rv, lv, LLVMMaskAllOn, type);
}
else {
llvm::Value *mask = (baseSym->parentFunction == ctx->GetFunction() &&
baseSym->storageClass != SC_STATIC) ?
ctx->GetInternalMask() : ctx->GetFullMask();
ctx->StoreInst(rv, lv, mask, type);
ctx->StoreInst(rv, lv, lMaskForSymbol(baseSym, ctx), type);
}
}
@@ -1540,7 +1546,8 @@ lEmitOpAssign(AssignExpr::Op op, Expr *arg0, Expr *arg1, const Type *type,
// operator and load the current value on the left-hand side.
llvm::Value *rvalue = arg1->GetValue(ctx);
ctx->SetDebugPos(pos);
llvm::Value *oldLHS = ctx->LoadInst(lv, type, "opassign_load");
llvm::Value *mask = lMaskForSymbol(baseSym, ctx);
llvm::Value *oldLHS = ctx->LoadInst(lv, mask, type, "opassign_load");
// Map the operator to the corresponding BinaryExpr::Op operator
BinaryExpr::Op basicop;
@@ -1794,7 +1801,7 @@ lEmitVaryingSelect(FunctionEmitContext *ctx, llvm::Value *test,
ctx->StoreInst(expr2, resultPtr);
// Use masking to conditionally store the expr1 values
ctx->StoreInst(expr1, resultPtr, test, type);
return ctx->LoadInst(resultPtr, type, "selectexpr_final");
return ctx->LoadInst(resultPtr, LLVMMaskAllOn, type, "selectexpr_final");
}
@@ -2573,7 +2580,8 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
const Type *type = rt->GetReferenceTarget();
llvm::Value *ptr = ctx->AllocaInst(type->LLVMType(g->ctx), "arg");
llvm::Value *val = ctx->LoadInst(argValue, type);
llvm::Value *mask = lMaskForSymbol(argExpr->GetBaseSymbol(), ctx);
llvm::Value *val = ctx->LoadInst(argValue, mask, type);
ctx->StoreInst(val, ptr);
storedArgValPtrs.push_back(ptr);
argValLValues.push_back(argValue);
@@ -2615,9 +2623,8 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const {
const ReferenceType *rt =
dynamic_cast<const ReferenceType *>(callargs[i]->GetType());
assert(rt != NULL);
llvm::Value *load = ctx->LoadInst(ptr, rt->GetReferenceTarget(),
llvm::Value *load = ctx->LoadInst(ptr, NULL, rt->GetReferenceTarget(),
"load_ref");
Symbol *baseSym = callargs[i]->GetBaseSymbol();
lStoreAssignResult(load, argValLValues[i], rt->GetReferenceTarget(),
ctx, baseSym);
@@ -2885,7 +2892,8 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const {
ctx->SetDebugPos(pos);
llvm::Value *lvalue = GetLValue(ctx);
if (!lvalue) {
llvm::Value *mask = NULL;
if (lvalue == NULL) {
// We may be indexing into a temporary that hasn't hit memory, so
// get the full value and stuff it into temporary alloca'd space so
// that we can index from there...
@@ -2900,10 +2908,16 @@ IndexExpr::GetValue(FunctionEmitContext *ctx) const {
ctx->StoreInst(val, ptr);
ptr = lCastUniformVectorBasePtr(ptr, ctx);
lvalue = ctx->GetElementPtrInst(ptr, LLVMInt32(0), index->GetValue(ctx));
mask = LLVMMaskAllOn;
}
else {
Symbol *baseSym = GetBaseSymbol();
assert(baseSym != NULL);
mask = lMaskForSymbol(baseSym, ctx);
}
ctx->SetDebugPos(pos);
return ctx->LoadInst(lvalue, GetType(), "index");
return ctx->LoadInst(lvalue, mask, GetType(), "index");
}
@@ -3172,6 +3186,7 @@ VectorMemberExpr::GetType() const {
}
}
llvm::Value*
VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const {
if (identifier.length() == 1) {
@@ -3181,6 +3196,7 @@ VectorMemberExpr::GetLValue(FunctionEmitContext* ctx) const {
}
}
llvm::Value*
VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const {
if (identifier.length() == 1) {
@@ -3214,12 +3230,12 @@ VectorMemberExpr::GetValue(FunctionEmitContext* ctx) const {
ctx->GetElementPtrInst(basePtr , 0,
indices[i], "orig_offset");
llvm::Value *initValue =
ctx->LoadInst(initLValue, memberType->GetElementType(),
ctx->LoadInst(initLValue, NULL, memberType->GetElementType(),
"vec_element");
ctx->StoreInst(initValue, ptmp);
}
return ctx->LoadInst(ltmp, memberType, "swizzle_vec");
return ctx->LoadInst(ltmp, NULL, memberType, "swizzle_vec");
}
}
@@ -3349,7 +3365,8 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const {
return NULL;
llvm::Value *lvalue = GetLValue(ctx);
if (!lvalue) {
llvm::Value *mask = NULL;
if (lvalue == NULL) {
// As in the array case, this may be a temporary that hasn't hit
// memory; get the full value and stuff it into a temporary array
// so that we can index from there...
@@ -3368,10 +3385,16 @@ MemberExpr::GetValue(FunctionEmitContext *ctx) const {
if (elementNumber == -1)
return NULL;
lvalue = ctx->GetElementPtrInst(ptr, 0, elementNumber);
mask = LLVMMaskAllOn;
}
else {
Symbol *baseSym = GetBaseSymbol();
assert(baseSym != NULL);
mask = lMaskForSymbol(baseSym, ctx);
}
ctx->SetDebugPos(pos);
return ctx->LoadInst(lvalue, GetType(), "structelement");
return ctx->LoadInst(lvalue, mask, GetType(), "structelement");
}
@@ -5269,7 +5292,7 @@ DereferenceExpr::GetValue(FunctionEmitContext *ctx) const {
return NULL;
ctx->SetDebugPos(pos);
return ctx->LoadInst(ptr, type, "reference_load");
return ctx->LoadInst(ptr, NULL, type, "reference_load");
}
@@ -5347,7 +5370,7 @@ SymbolExpr::GetValue(FunctionEmitContext *ctx) const {
if (!symbol || !symbol->storagePtr)
return NULL;
ctx->SetDebugPos(pos);
return ctx->LoadInst(symbol->storagePtr, GetType(), symbol->name.c_str());
return ctx->LoadInst(symbol->storagePtr, NULL, NULL, symbol->name.c_str());
}

View File

@@ -418,7 +418,7 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
// and copy the value from the struct and into the local alloca'ed
// memory
llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, sym->name.c_str());
llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, sym->name.c_str());
ctx->StoreInst(ptrval, sym->storagePtr);
ctx->EmitFunctionParameterDebugInfo(sym);
}
@@ -465,7 +465,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
// The mask is the last parameter in the argument structure
llvm::Value *ptr = ctx->GetElementPtrInst(structParamPtr, 0, nArgs,
"task_struct_mask");
llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, "mask");
llvm::Value *ptrval = ctx->LoadInst(ptr, NULL, NULL, "mask");
ctx->SetFunctionMask(ptrval);
// Copy threadIndex and threadCount into stack-allocated storage so