These make it easier to iterate over arbitrary amounts of data elements; specifically, they automatically handle the "ragged extra bits" that come up when the number of elements to be processed isn't evenly divided by programCount. TODO: documentation
2496 lines
94 KiB
C++
2496 lines
94 KiB
C++
/*
|
|
Copyright (c) 2010-2011, Intel Corporation
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of Intel Corporation nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
|
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/** @file ctx.cpp
|
|
@brief Implementation of the FunctionEmitContext class
|
|
*/
|
|
|
|
#include "ctx.h"
|
|
#include "util.h"
|
|
#include "func.h"
|
|
#include "llvmutil.h"
|
|
#include "type.h"
|
|
#include "stmt.h"
|
|
#include "expr.h"
|
|
#include "module.h"
|
|
#include "sym.h"
|
|
#include <map>
|
|
#include <llvm/DerivedTypes.h>
|
|
#include <llvm/Instructions.h>
|
|
#include <llvm/Support/Dwarf.h>
|
|
#include <llvm/Metadata.h>
|
|
#include <llvm/Module.h>
|
|
|
|
/** This is a small utility structure that records information related to one
|
|
level of nested control flow. It's mostly used in correctly restoring
|
|
the mask and other state as we exit control flow nesting levels.
|
|
*/
|
|
struct CFInfo {
|
|
/** Returns a new instance of the structure that represents entering an
|
|
'if' statement */
|
|
static CFInfo *GetIf(bool isUniform, llvm::Value *savedMask);
|
|
|
|
/** Returns a new instance of the structure that represents entering a
|
|
loop. */
|
|
static CFInfo *GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
|
|
llvm::BasicBlock *continueTarget,
|
|
llvm::Value *savedBreakLanesPtr,
|
|
llvm::Value *savedContinueLanesPtr,
|
|
llvm::Value *savedMask, llvm::Value *savedLoopMask);
|
|
|
|
static CFInfo *GetForeach(llvm::BasicBlock *breakTarget,
|
|
llvm::BasicBlock *continueTarget,
|
|
llvm::Value *savedBreakLanesPtr,
|
|
llvm::Value *savedContinueLanesPtr,
|
|
llvm::Value *savedMask, llvm::Value *savedLoopMask);
|
|
|
|
bool IsIf() { return type == If; }
|
|
bool IsLoop() { return type == Loop; }
|
|
bool IsForeach() { return type == Foreach; }
|
|
bool IsVaryingType() { return !isUniform; }
|
|
bool IsUniform() { return isUniform; }
|
|
|
|
enum CFType { If, Loop, Foreach };
|
|
CFType type;
|
|
bool isUniform;
|
|
llvm::BasicBlock *savedBreakTarget, *savedContinueTarget;
|
|
llvm::Value *savedBreakLanesPtr, *savedContinueLanesPtr;
|
|
llvm::Value *savedMask, *savedLoopMask;
|
|
|
|
private:
|
|
CFInfo(CFType t, bool uniformIf, llvm::Value *sm) {
|
|
assert(t == If);
|
|
type = t;
|
|
isUniform = uniformIf;
|
|
savedBreakTarget = savedContinueTarget = NULL;
|
|
savedBreakLanesPtr = savedContinueLanesPtr = NULL;
|
|
savedMask = savedLoopMask = sm;
|
|
}
|
|
CFInfo(CFType t, bool iu, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
|
llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
|
|
llvm::Value *lm) {
|
|
assert(t == Loop);
|
|
type = t;
|
|
isUniform = iu;
|
|
savedBreakTarget = bt;
|
|
savedContinueTarget = ct;
|
|
savedBreakLanesPtr = sb;
|
|
savedContinueLanesPtr = sc;
|
|
savedMask = sm;
|
|
savedLoopMask = lm;
|
|
}
|
|
CFInfo(CFType t, llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
|
llvm::Value *sb, llvm::Value *sc, llvm::Value *sm,
|
|
llvm::Value *lm) {
|
|
assert(t == Foreach);
|
|
type = t;
|
|
isUniform = false;
|
|
savedBreakTarget = bt;
|
|
savedContinueTarget = ct;
|
|
savedBreakLanesPtr = sb;
|
|
savedContinueLanesPtr = sc;
|
|
savedMask = sm;
|
|
savedLoopMask = lm;
|
|
}
|
|
};
|
|
|
|
|
|
CFInfo *
|
|
CFInfo::GetIf(bool isUniform, llvm::Value *savedMask) {
|
|
return new CFInfo(If, isUniform, savedMask);
|
|
}
|
|
|
|
|
|
CFInfo *
|
|
CFInfo::GetLoop(bool isUniform, llvm::BasicBlock *breakTarget,
|
|
llvm::BasicBlock *continueTarget,
|
|
llvm::Value *savedBreakLanesPtr,
|
|
llvm::Value *savedContinueLanesPtr,
|
|
llvm::Value *savedMask, llvm::Value *savedLoopMask) {
|
|
return new CFInfo(Loop, isUniform, breakTarget, continueTarget,
|
|
savedBreakLanesPtr, savedContinueLanesPtr,
|
|
savedMask, savedLoopMask);
|
|
}
|
|
|
|
|
|
CFInfo *
|
|
CFInfo::GetForeach(llvm::BasicBlock *breakTarget,
|
|
llvm::BasicBlock *continueTarget,
|
|
llvm::Value *savedBreakLanesPtr,
|
|
llvm::Value *savedContinueLanesPtr,
|
|
llvm::Value *savedMask, llvm::Value *savedForeachMask) {
|
|
return new CFInfo(Foreach, breakTarget, continueTarget,
|
|
savedBreakLanesPtr, savedContinueLanesPtr,
|
|
savedMask, savedForeachMask);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym,
|
|
llvm::Function *llvmFunction,
|
|
SourcePos firstStmtPos) {
|
|
function = func;
|
|
|
|
/* Create a new basic block to store all of the allocas */
|
|
allocaBlock = llvm::BasicBlock::Create(*g->ctx, "allocas", llvmFunction, 0);
|
|
bblock = llvm::BasicBlock::Create(*g->ctx, "entry", llvmFunction, 0);
|
|
/* But jump from it immediately into the real entry block */
|
|
llvm::BranchInst::Create(bblock, allocaBlock);
|
|
|
|
funcStartPos = funSym->pos;
|
|
|
|
internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory");
|
|
StoreInst(LLVMMaskAllOn, internalMaskPointer);
|
|
functionMaskValue = LLVMMaskAllOn;
|
|
fullMaskPointer = NULL;
|
|
|
|
loopMask = NULL;
|
|
breakLanesPtr = continueLanesPtr = NULL;
|
|
breakTarget = continueTarget = NULL;
|
|
|
|
returnedLanesPtr = AllocaInst(LLVMTypes::MaskType, "returned_lanes_memory");
|
|
StoreInst(LLVMMaskAllOff, returnedLanesPtr);
|
|
|
|
launchedTasks = false;
|
|
launchGroupHandlePtr = AllocaInst(LLVMTypes::VoidPointerType, "launch_group_handle");
|
|
StoreInst(llvm::Constant::getNullValue(LLVMTypes::VoidPointerType),
|
|
launchGroupHandlePtr);
|
|
|
|
const Type *returnType = function->GetReturnType();
|
|
if (!returnType || returnType == AtomicType::Void)
|
|
returnValuePtr = NULL;
|
|
else {
|
|
LLVM_TYPE_CONST llvm::Type *ftype = returnType->LLVMType(g->ctx);
|
|
returnValuePtr = AllocaInst(ftype, "return_value_memory");
|
|
}
|
|
|
|
if (m->diBuilder) {
|
|
/* If debugging is enabled, tell the debug information emission
|
|
code about this new function */
|
|
diFile = funcStartPos.GetDIFile();
|
|
llvm::DIType retType = function->GetReturnType()->GetDIType(diFile);
|
|
int flags = llvm::DIDescriptor::FlagPrototyped; // ??
|
|
diFunction = m->diBuilder->createFunction(diFile, /* scope */
|
|
llvmFunction->getName(), // mangled
|
|
funSym->name,
|
|
diFile,
|
|
funcStartPos.first_line,
|
|
retType,
|
|
funSym->storageClass == SC_STATIC,
|
|
true, /* is definition */
|
|
flags,
|
|
g->opt.level > 0,
|
|
llvmFunction);
|
|
/* And start a scope representing the initial function scope */
|
|
StartScope();
|
|
|
|
llvm::DIFile file = funcStartPos.GetDIFile();
|
|
Symbol *programIndexSymbol = m->symbolTable->LookupVariable("programIndex");
|
|
assert(programIndexSymbol && programIndexSymbol->storagePtr);
|
|
m->diBuilder->createGlobalVariable(programIndexSymbol->name,
|
|
file,
|
|
funcStartPos.first_line,
|
|
programIndexSymbol->type->GetDIType(file),
|
|
true /* static */,
|
|
programIndexSymbol->storagePtr);
|
|
|
|
Symbol *programCountSymbol = m->symbolTable->LookupVariable("programCount");
|
|
assert(programCountSymbol);
|
|
m->diBuilder->createGlobalVariable(programCountSymbol->name,
|
|
file,
|
|
funcStartPos.first_line,
|
|
programCountSymbol->type->GetDIType(file),
|
|
true /* static */,
|
|
programCountSymbol->storagePtr);
|
|
}
|
|
}
|
|
|
|
|
|
FunctionEmitContext::~FunctionEmitContext() {
|
|
assert(controlFlowInfo.size() == 0);
|
|
assert(debugScopes.size() == (m->diBuilder ? 1 : 0));
|
|
}
|
|
|
|
|
|
const Function *
|
|
FunctionEmitContext::GetFunction() const {
|
|
return function;
|
|
}
|
|
|
|
|
|
llvm::BasicBlock *
|
|
FunctionEmitContext::GetCurrentBasicBlock() {
|
|
return bblock;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::SetCurrentBasicBlock(llvm::BasicBlock *bb) {
|
|
bblock = bb;
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::GetFunctionMask() {
|
|
return functionMaskValue;
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::GetInternalMask() {
|
|
if (VaryingCFDepth() == 0)
|
|
return LLVMMaskAllOn;
|
|
else
|
|
return LoadInst(internalMaskPointer, "load_mask");
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::GetFullMask() {
|
|
llvm::Value *internalMask = GetInternalMask();
|
|
if (internalMask == LLVMMaskAllOn && functionMaskValue == LLVMMaskAllOn)
|
|
return LLVMMaskAllOn;
|
|
else
|
|
return BinaryOperator(llvm::Instruction::And, GetInternalMask(),
|
|
functionMaskValue, "internal_mask&function_mask");
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::SetMaskPointer(llvm::Value *p) {
|
|
fullMaskPointer = p;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::SetFunctionMask(llvm::Value *value) {
|
|
functionMaskValue = value;
|
|
StoreInst(GetFullMask(), fullMaskPointer);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::SetLoopMask(llvm::Value *value) {
|
|
loopMask = value;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::SetInternalMask(llvm::Value *value) {
|
|
StoreInst(value, internalMaskPointer);
|
|
// kludge so that __mask returns the right value in ispc code.
|
|
StoreInst(GetFullMask(), fullMaskPointer);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) {
|
|
llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask,
|
|
test, "oldMask&test");
|
|
SetInternalMask(mask);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::SetInternalMaskAndNot(llvm::Value *oldMask, llvm::Value *test) {
|
|
llvm::Value *notTest = BinaryOperator(llvm::Instruction::Xor, test, LLVMMaskAllOn,
|
|
"~test");
|
|
llvm::Value *mask = BinaryOperator(llvm::Instruction::And, oldMask, notTest,
|
|
"oldMask&~test");
|
|
SetInternalMask(mask);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
|
|
assert(bblock != NULL);
|
|
llvm::Value *any = Any(GetFullMask());
|
|
BranchInst(btrue, bfalse, any);
|
|
// It's illegal to add any additional instructions to the basic block
|
|
// now that it's terminated, so set bblock to NULL to be safe
|
|
bblock = NULL;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
|
|
assert(bblock != NULL);
|
|
llvm::Value *all = All(GetFullMask());
|
|
BranchInst(btrue, bfalse, all);
|
|
// It's illegal to add any additional instructions to the basic block
|
|
// now that it's terminated, so set bblock to NULL to be safe
|
|
bblock = NULL;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse) {
|
|
assert(bblock != NULL);
|
|
// switch sense of true/false bblocks
|
|
BranchIfMaskAny(bfalse, btrue);
|
|
// It's illegal to add any additional instructions to the basic block
|
|
// now that it's terminated, so set bblock to NULL to be safe
|
|
bblock = NULL;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::StartUniformIf() {
|
|
controlFlowInfo.push_back(CFInfo::GetIf(true, GetInternalMask()));
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::StartVaryingIf(llvm::Value *oldMask) {
|
|
controlFlowInfo.push_back(CFInfo::GetIf(false, oldMask));
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::EndIf() {
|
|
// Make sure we match up with a Start{Uniform,Varying}If().
|
|
assert(controlFlowInfo.size() > 0 && controlFlowInfo.back()->IsIf());
|
|
CFInfo *ci = controlFlowInfo.back();
|
|
controlFlowInfo.pop_back();
|
|
|
|
// 'uniform' ifs don't change the mask so we only need to restore the
|
|
// mask going into the if for 'varying' if statements
|
|
if (!ci->IsUniform() && bblock != NULL) {
|
|
// We can't just restore the mask as it was going into the 'if'
|
|
// statement. First we have to take into account any program
|
|
// instances that have executed 'return' statements; the restored
|
|
// mask must be off for those lanes.
|
|
restoreMaskGivenReturns(ci->savedMask);
|
|
|
|
// If the 'if' statement is inside a loop with a 'varying'
|
|
// consdition, we also need to account for any break or continue
|
|
// statements that executed inside the 'if' statmeent; we also must
|
|
// leave the lane masks for the program instances that ran those
|
|
// off after we restore the mask after the 'if'. The code below
|
|
// ends up being optimized out in the case that there were no break
|
|
// or continue statements (and breakLanesPtr and continueLanesPtr
|
|
// have their initial 'all off' values), so we don't need to check
|
|
// for that here.
|
|
if (breakLanesPtr != NULL) {
|
|
assert(continueLanesPtr != NULL);
|
|
|
|
// newMask = (oldMask & ~(breakLanes | continueLanes))
|
|
llvm::Value *oldMask = GetInternalMask();
|
|
llvm::Value *breakLanes = LoadInst(breakLanesPtr, "break_lanes");
|
|
llvm::Value *continueLanes = LoadInst(continueLanesPtr,
|
|
"continue_lanes");
|
|
llvm::Value *breakOrContinueLanes =
|
|
BinaryOperator(llvm::Instruction::Or, breakLanes, continueLanes,
|
|
"break|continue_lanes");
|
|
llvm::Value *notBreakOrContinue = NotOperator(breakOrContinueLanes,
|
|
"!(break|continue)_lanes");
|
|
llvm::Value *newMask =
|
|
BinaryOperator(llvm::Instruction::And, oldMask, notBreakOrContinue,
|
|
"new_mask");
|
|
SetInternalMask(newMask);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::StartLoop(llvm::BasicBlock *bt, llvm::BasicBlock *ct,
|
|
bool uniformCF) {
|
|
// Store the current values of various loop-related state so that we
|
|
// can restore it when we exit this loop.
|
|
llvm::Value *oldMask = GetInternalMask();
|
|
controlFlowInfo.push_back(CFInfo::GetLoop(uniformCF, breakTarget,
|
|
continueTarget, breakLanesPtr,
|
|
continueLanesPtr, oldMask, loopMask));
|
|
if (uniformCF)
|
|
// If the loop has a uniform condition, we don't need to track
|
|
// which lanes 'break' or 'continue'; all of the running ones go
|
|
// together, so we just jump
|
|
breakLanesPtr = continueLanesPtr = NULL;
|
|
else {
|
|
// For loops with varying conditions, allocate space to store masks
|
|
// that record which lanes have done these
|
|
continueLanesPtr = AllocaInst(LLVMTypes::MaskType, "continue_lanes_memory");
|
|
StoreInst(LLVMMaskAllOff, continueLanesPtr);
|
|
breakLanesPtr = AllocaInst(LLVMTypes::MaskType, "break_lanes_memory");
|
|
StoreInst(LLVMMaskAllOff, breakLanesPtr);
|
|
}
|
|
|
|
breakTarget = bt;
|
|
continueTarget = ct;
|
|
loopMask = NULL; // this better be set by the loop!
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::EndLoop() {
|
|
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsLoop());
|
|
CFInfo *ci = controlFlowInfo.back();
|
|
controlFlowInfo.pop_back();
|
|
|
|
// Restore the break/continue state information to what it was before
|
|
// we went into this loop.
|
|
breakTarget = ci->savedBreakTarget;
|
|
continueTarget = ci->savedContinueTarget;
|
|
breakLanesPtr = ci->savedBreakLanesPtr;
|
|
continueLanesPtr = ci->savedContinueLanesPtr;
|
|
loopMask = ci->savedLoopMask;
|
|
|
|
if (!ci->IsUniform())
|
|
// If the loop had a 'uniform' test, then it didn't make any
|
|
// changes to the mask so there's nothing to restore. If it had a
|
|
// varying test, we need to restore the mask to what it was going
|
|
// into the loop, but still leaving off any lanes that executed a
|
|
// 'return' statement.
|
|
restoreMaskGivenReturns(ci->savedMask);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::StartForeach() {
|
|
// Store the current values of various loop-related state so that we
|
|
// can restore it when we exit this loop.
|
|
llvm::Value *oldMask = GetInternalMask();
|
|
controlFlowInfo.push_back(CFInfo::GetForeach(breakTarget, continueTarget, breakLanesPtr,
|
|
continueLanesPtr, oldMask, loopMask));
|
|
continueLanesPtr = breakLanesPtr = NULL;
|
|
breakTarget = NULL;
|
|
continueTarget = NULL;
|
|
loopMask = NULL;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::EndForeach() {
|
|
assert(controlFlowInfo.size() && controlFlowInfo.back()->IsForeach());
|
|
CFInfo *ci = controlFlowInfo.back();
|
|
controlFlowInfo.pop_back();
|
|
|
|
// Restore the break/continue state information to what it was before
|
|
// we went into this loop.
|
|
breakTarget = ci->savedBreakTarget;
|
|
continueTarget = ci->savedContinueTarget;
|
|
breakLanesPtr = ci->savedBreakLanesPtr;
|
|
continueLanesPtr = ci->savedContinueLanesPtr;
|
|
loopMask = ci->savedLoopMask;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::restoreMaskGivenReturns(llvm::Value *oldMask) {
|
|
if (!bblock)
|
|
return;
|
|
|
|
// Restore the mask to the given old mask, but leave off any lanes that
|
|
// executed a return statement.
|
|
// newMask = (oldMask & ~returnedLanes)
|
|
llvm::Value *returnedLanes = LoadInst(returnedLanesPtr,
|
|
"returned_lanes");
|
|
llvm::Value *notReturned = NotOperator(returnedLanes, "~returned_lanes");
|
|
llvm::Value *newMask = BinaryOperator(llvm::Instruction::And,
|
|
oldMask, notReturned, "new_mask");
|
|
SetInternalMask(newMask);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::Break(bool doCoherenceCheck) {
|
|
if (breakTarget == NULL) {
|
|
Error(currentPos, "\"break\" statement is illegal outside of for/while/do loops.");
|
|
return;
|
|
}
|
|
|
|
// If all of the enclosing 'if' tests in the loop have uniform control
|
|
// flow or if we can tell that the mask is all on, then we can just
|
|
// jump to the break location.
|
|
if (ifsInLoopAllUniform() || GetInternalMask() == LLVMMaskAllOn) {
|
|
BranchInst(breakTarget);
|
|
if (ifsInLoopAllUniform() && doCoherenceCheck)
|
|
Warning(currentPos, "Coherent break statement not necessary in fully uniform "
|
|
"control flow.");
|
|
// Set bblock to NULL since the jump has terminated the basic block
|
|
bblock = NULL;
|
|
}
|
|
else {
|
|
// Otherwise we need to update the mask of the lanes that have
|
|
// executed a 'break' statement:
|
|
// breakLanes = breakLanes | mask
|
|
assert(breakLanesPtr != NULL);
|
|
llvm::Value *mask = GetInternalMask();
|
|
llvm::Value *breakMask = LoadInst(breakLanesPtr,
|
|
"break_mask");
|
|
llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
|
|
mask, breakMask, "mask|break_mask");
|
|
StoreInst(newMask, breakLanesPtr);
|
|
|
|
// Set the current mask to be all off, just in case there are any
|
|
// statements in the same scope after the 'break'. Most of time
|
|
// this will be optimized away since we'll likely end the scope of
|
|
// an 'if' statement and restore the mask then.
|
|
SetInternalMask(LLVMMaskAllOff);
|
|
|
|
if (doCoherenceCheck)
|
|
// If the user has indicated that this is a 'coherent' break
|
|
// statement, then check to see if the mask is all off. If so,
|
|
// we have to conservatively jump to the continueTarget, not
|
|
// the breakTarget, since part of the reason the mask is all
|
|
// off may be due to 'continue' statements that executed in the
|
|
// current loop iteration.
|
|
// FIXME: if the loop only has break statements and no
|
|
// continues, we can jump to breakTarget in that case.
|
|
jumpIfAllLoopLanesAreDone(continueTarget);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::Continue(bool doCoherenceCheck) {
|
|
if (!continueTarget) {
|
|
Error(currentPos, "\"continue\" statement illegal outside of for/while/do loops.");
|
|
return;
|
|
}
|
|
|
|
if (ifsInLoopAllUniform() || GetInternalMask() == LLVMMaskAllOn) {
|
|
// Similarly to 'break' statements, we can immediately jump to the
|
|
// continue target if we're only in 'uniform' control flow within
|
|
// loop or if we can tell that the mask is all on.
|
|
AddInstrumentationPoint("continue: uniform CF, jumped");
|
|
if (ifsInLoopAllUniform() && doCoherenceCheck)
|
|
Warning(currentPos, "Coherent continue statement not necessary in fully uniform "
|
|
"control flow.");
|
|
BranchInst(continueTarget);
|
|
bblock = NULL;
|
|
}
|
|
else {
|
|
// Otherwise update the stored value of which lanes have 'continue'd.
|
|
// continueLanes = continueLanes | mask
|
|
assert(continueLanesPtr);
|
|
llvm::Value *mask = GetInternalMask();
|
|
llvm::Value *continueMask =
|
|
LoadInst(continueLanesPtr, "continue_mask");
|
|
llvm::Value *newMask = BinaryOperator(llvm::Instruction::Or,
|
|
mask, continueMask, "mask|continueMask");
|
|
StoreInst(newMask, continueLanesPtr);
|
|
|
|
// And set the current mask to be all off in case there are any
|
|
// statements in the same scope after the 'continue'
|
|
SetInternalMask(LLVMMaskAllOff);
|
|
|
|
if (doCoherenceCheck)
|
|
// If this is a 'coherent continue' statement, then emit the
|
|
// code to see if all of the lanes are now off due to
|
|
// breaks/continues and jump to the continue target if so.
|
|
jumpIfAllLoopLanesAreDone(continueTarget);
|
|
}
|
|
}
|
|
|
|
|
|
/** This function checks to see if all of the 'if' statements (if any)
|
|
between the current scope and the first enclosing loop have 'uniform'
|
|
tests.
|
|
*/
|
|
bool
|
|
FunctionEmitContext::ifsInLoopAllUniform() const {
|
|
assert(controlFlowInfo.size() > 0);
|
|
// Go backwards through controlFlowInfo, since we add new nested scopes
|
|
// to the back. Stop once we come to the first enclosing loop.
|
|
int i = controlFlowInfo.size() - 1;
|
|
while (i >= 0 && controlFlowInfo[i]->type != CFInfo::Loop) {
|
|
if (controlFlowInfo[i]->isUniform == false)
|
|
// Found a scope due to an 'if' statement with a varying test
|
|
return false;
|
|
--i;
|
|
}
|
|
assert(i >= 0); // else we didn't find a loop!
|
|
return true;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target) {
|
|
// Check to see if (returned lanes | continued lanes | break lanes) is
|
|
// equal to the value of mask at the start of the loop iteration. If
|
|
// so, everyone is done and we can jump to the given target
|
|
llvm::Value *returned = LoadInst(returnedLanesPtr,
|
|
"returned_lanes");
|
|
llvm::Value *continued = LoadInst(continueLanesPtr,
|
|
"continue_lanes");
|
|
llvm::Value *breaked = LoadInst(breakLanesPtr, "break_lanes");
|
|
llvm::Value *returnedOrContinued = BinaryOperator(llvm::Instruction::Or,
|
|
returned, continued,
|
|
"returned|continued");
|
|
llvm::Value *returnedOrContinuedOrBreaked =
|
|
BinaryOperator(llvm::Instruction::Or, returnedOrContinued,
|
|
breaked, "returned|continued");
|
|
|
|
// Do we match the mask at loop entry?
|
|
llvm::Value *allRCB = MasksAllEqual(returnedOrContinuedOrBreaked, loopMask);
|
|
llvm::BasicBlock *bAll = CreateBasicBlock("all_continued_or_breaked");
|
|
llvm::BasicBlock *bNotAll = CreateBasicBlock("not_all_continued_or_breaked");
|
|
BranchInst(bAll, bNotAll, allRCB);
|
|
|
|
// If so, have an extra basic block along the way to add
|
|
// instrumentation, if the user asked for it.
|
|
bblock = bAll;
|
|
AddInstrumentationPoint("break/continue: all dynamically went");
|
|
BranchInst(target);
|
|
|
|
// And set the current basic block to a new one for future instructions
|
|
// for the path where we weren't able to jump
|
|
bblock = bNotAll;
|
|
AddInstrumentationPoint("break/continue: not all went");
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::RestoreContinuedLanes() {
|
|
if (continueLanesPtr == NULL)
|
|
return;
|
|
|
|
// mask = mask & continueFlags
|
|
llvm::Value *mask = GetInternalMask();
|
|
llvm::Value *continueMask = LoadInst(continueLanesPtr,
|
|
"continue_mask");
|
|
llvm::Value *orMask = BinaryOperator(llvm::Instruction::Or,
|
|
mask, continueMask, "mask|continue_mask");
|
|
SetInternalMask(orMask);
|
|
|
|
// continueLanes = 0
|
|
StoreInst(LLVMMaskAllOff, continueLanesPtr);
|
|
}
|
|
|
|
|
|
int
|
|
FunctionEmitContext::VaryingCFDepth() const {
|
|
int sum = 0;
|
|
for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
|
|
if (controlFlowInfo[i]->IsVaryingType())
|
|
++sum;
|
|
return sum;
|
|
}
|
|
|
|
|
|
bool
|
|
FunctionEmitContext::InForeachLoop() const {
|
|
for (unsigned int i = 0; i < controlFlowInfo.size(); ++i)
|
|
if (controlFlowInfo[i]->IsForeach())
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck) {
|
|
const Type *returnType = function->GetReturnType();
|
|
if (returnType == AtomicType::Void) {
|
|
if (expr != NULL)
|
|
Error(expr->pos, "Can't return non-void type \"%s\" from void function.",
|
|
expr->GetType()->GetString().c_str());
|
|
}
|
|
else {
|
|
if (expr == NULL) {
|
|
Error(funcStartPos, "Must provide return value for return "
|
|
"statement for non-void function.");
|
|
return;
|
|
}
|
|
|
|
expr = TypeConvertExpr(expr, returnType, "return statement");
|
|
if (expr != NULL) {
|
|
llvm::Value *retVal = expr->GetValue(this);
|
|
if (retVal != NULL)
|
|
// Use a masked store to store the value of the expression
|
|
// in the return value memory; this preserves the return
|
|
// values from other lanes that may have executed return
|
|
// statements previously.
|
|
StoreInst(retVal, returnValuePtr, GetInternalMask(),
|
|
PointerType::GetUniform(returnType));
|
|
}
|
|
}
|
|
|
|
if (VaryingCFDepth() == 0) {
|
|
// If there is only uniform control flow between us and the
|
|
// function entry, then it's guaranteed that all lanes are running,
|
|
// so we can just emit a true return instruction
|
|
AddInstrumentationPoint("return: uniform control flow");
|
|
ReturnInst();
|
|
}
|
|
else {
|
|
// Otherwise we update the returnedLanes value by ANDing it with
|
|
// the current lane mask.
|
|
llvm::Value *oldReturnedLanes =
|
|
LoadInst(returnedLanesPtr, "old_returned_lanes");
|
|
llvm::Value *newReturnedLanes =
|
|
BinaryOperator(llvm::Instruction::Or, oldReturnedLanes,
|
|
GetInternalMask(), "old_mask|returned_lanes");
|
|
|
|
// For 'coherent' return statements, emit code to check if all
|
|
// lanes have returned
|
|
if (doCoherenceCheck) {
|
|
// if newReturnedLanes == functionMaskValue, get out of here!
|
|
llvm::Value *cmp = MasksAllEqual(functionMaskValue,
|
|
newReturnedLanes);
|
|
llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return");
|
|
llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return");
|
|
BranchInst(bDoReturn, bNoReturn, cmp);
|
|
|
|
bblock = bDoReturn;
|
|
AddInstrumentationPoint("return: all lanes have returned");
|
|
ReturnInst();
|
|
|
|
bblock = bNoReturn;
|
|
}
|
|
// Otherwise update returnedLanesPtr and turn off all of the lanes
|
|
// in the current mask so that any subsequent statements in the
|
|
// same scope after the return have no effect
|
|
StoreInst(newReturnedLanes, returnedLanesPtr);
|
|
AddInstrumentationPoint("return: some but not all lanes have returned");
|
|
SetInternalMask(LLVMMaskAllOff);
|
|
}
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::Any(llvm::Value *mask) {
|
|
llvm::Value *mmval = LaneMask(mask);
|
|
return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, mmval,
|
|
LLVMInt32(0), "any_mm_cmp");
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::All(llvm::Value *mask) {
|
|
llvm::Value *mmval = LaneMask(mask);
|
|
return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mmval,
|
|
LLVMInt32((1<<g->target.vectorWidth)-1), "all_mm_cmp");
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::LaneMask(llvm::Value *v) {
|
|
// Call the target-dependent movmsk function to turn the vector mask
|
|
// into an i32 value
|
|
std::vector<Symbol *> *mm = m->symbolTable->LookupFunction("__movmsk");
|
|
// There should be one with signed int signature, one unsigned int.
|
|
assert(mm && mm->size() == 2);
|
|
llvm::Function *fmm = (*mm)[0]->function;
|
|
return CallInst(fmm, NULL, v, "val_movmsk");
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
|
#if 0
|
|
// Compare the two masks to get a vector of i1s
|
|
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
|
v1, v2, "v1==v2");
|
|
// Turn that into a bool vector type (often i32s)
|
|
cmp = I1VecToBoolVec(cmp);
|
|
// And see if it's all on
|
|
return All(cmp);
|
|
#else
|
|
llvm::Value *mm1 = LaneMask(v1);
|
|
llvm::Value *mm2 = LaneMask(v2);
|
|
return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
|
|
"v1==v2");
|
|
#endif
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::GetStringPtr(const std::string &str) {
|
|
llvm::Constant *lstr = llvm::ConstantArray::get(*g->ctx, str);
|
|
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
|
|
llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
|
|
true /*isConst*/,
|
|
linkage, lstr, "__str");
|
|
return new llvm::BitCastInst(lstrPtr, LLVMTypes::VoidPointerType,
|
|
"str_void_ptr", bblock);
|
|
}
|
|
|
|
|
|
llvm::BasicBlock *
|
|
FunctionEmitContext::CreateBasicBlock(const char *name) {
|
|
llvm::Function *function = bblock->getParent();
|
|
return llvm::BasicBlock::Create(*g->ctx, name, function);
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
|
|
if (b == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
LLVM_TYPE_CONST llvm::ArrayType *at =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(b->getType());
|
|
if (at) {
|
|
// If we're given an array of vectors of i1s, then do the
|
|
// conversion for each of the elements
|
|
LLVM_TYPE_CONST llvm::Type *boolArrayType =
|
|
llvm::ArrayType::get(LLVMTypes::BoolVectorType, at->getNumElements());
|
|
llvm::Value *ret = llvm::UndefValue::get(boolArrayType);
|
|
|
|
for (unsigned int i = 0; i < at->getNumElements(); ++i) {
|
|
llvm::Value *elt = ExtractInst(b, i);
|
|
llvm::Value *sext = SExtInst(elt, LLVMTypes::BoolVectorType,
|
|
"val_to_boolvec32");
|
|
ret = InsertInst(ret, sext, i);
|
|
}
|
|
return ret;
|
|
}
|
|
else
|
|
return SExtInst(b, LLVMTypes::BoolVectorType, "val_to_boolvec32");
|
|
}
|
|
|
|
|
|
static llvm::Value *
|
|
lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
|
|
llvm::Constant *sConstant = llvm::ConstantArray::get(*g->ctx, s);
|
|
llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(),
|
|
true /* const */,
|
|
llvm::GlobalValue::InternalLinkage,
|
|
sConstant, s);
|
|
llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) };
|
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
|
llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
|
|
return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);
|
|
#else
|
|
return llvm::GetElementPtrInst::Create(sPtr, &indices[0], &indices[2],
|
|
"sptr", bblock);
|
|
#endif
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::AddInstrumentationPoint(const char *note) {
|
|
assert(note != NULL);
|
|
if (!g->emitInstrumentation)
|
|
return;
|
|
|
|
std::vector<llvm::Value *> args;
|
|
// arg 1: filename as string
|
|
args.push_back(lGetStringAsValue(bblock, currentPos.name));
|
|
// arg 2: provided note
|
|
args.push_back(lGetStringAsValue(bblock, note));
|
|
// arg 3: line number
|
|
args.push_back(LLVMInt32(currentPos.first_line));
|
|
// arg 4: current mask, movmsk'ed down to an int32
|
|
args.push_back(LaneMask(GetFullMask()));
|
|
|
|
llvm::Function *finst = m->module->getFunction("ISPCInstrument");
|
|
CallInst(finst, NULL, args, "");
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::SetDebugPos(SourcePos pos) {
|
|
currentPos = pos;
|
|
}
|
|
|
|
|
|
SourcePos
|
|
FunctionEmitContext::GetDebugPos() const {
|
|
return currentPos;
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos,
|
|
llvm::DIScope *scope) {
|
|
llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(value);
|
|
if (inst != NULL && m->diBuilder) {
|
|
SourcePos p = pos ? *pos : currentPos;
|
|
if (p.first_line != 0)
|
|
// If first_line == 0, then we're in the middle of setting up
|
|
// the standard library or the like; don't add debug positions
|
|
// for those functions
|
|
inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column,
|
|
scope ? *scope : GetDIScope()));
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::StartScope() {
|
|
if (m->diBuilder != NULL) {
|
|
llvm::DIScope parentScope;
|
|
if (debugScopes.size() > 0)
|
|
parentScope = debugScopes.back();
|
|
else
|
|
parentScope = diFunction;
|
|
|
|
llvm::DILexicalBlock lexicalBlock =
|
|
m->diBuilder->createLexicalBlock(parentScope, diFile,
|
|
currentPos.first_line,
|
|
currentPos.first_column);
|
|
debugScopes.push_back(lexicalBlock);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::EndScope() {
|
|
if (m->diBuilder != NULL) {
|
|
assert(debugScopes.size() > 0);
|
|
debugScopes.pop_back();
|
|
}
|
|
}
|
|
|
|
|
|
llvm::DIScope
|
|
FunctionEmitContext::GetDIScope() const {
|
|
assert(debugScopes.size() > 0);
|
|
return debugScopes.back();
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) {
|
|
if (m->diBuilder == NULL)
|
|
return;
|
|
|
|
llvm::DIScope scope = GetDIScope();
|
|
llvm::DIVariable var =
|
|
m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_auto_variable,
|
|
scope,
|
|
sym->name,
|
|
sym->pos.GetDIFile(),
|
|
sym->pos.first_line,
|
|
sym->type->GetDIType(scope),
|
|
true /* preserve through opts */);
|
|
llvm::Instruction *declareInst =
|
|
m->diBuilder->insertDeclare(sym->storagePtr, var, bblock);
|
|
AddDebugPos(declareInst, &sym->pos, &scope);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym) {
|
|
if (m->diBuilder == NULL)
|
|
return;
|
|
|
|
llvm::DIScope scope = diFunction;
|
|
llvm::DIVariable var =
|
|
m->diBuilder->createLocalVariable(llvm::dwarf::DW_TAG_arg_variable,
|
|
scope,
|
|
sym->name,
|
|
sym->pos.GetDIFile(),
|
|
sym->pos.first_line,
|
|
sym->type->GetDIType(scope),
|
|
true /* preserve through opts */);
|
|
llvm::Instruction *declareInst =
|
|
m->diBuilder->insertDeclare(sym->storagePtr, var, bblock);
|
|
AddDebugPos(declareInst, &sym->pos, &scope);
|
|
}
|
|
|
|
|
|
/** If the given type is an array of vector types, then it's the
|
|
representation of an ispc VectorType with varying elements. If it is
|
|
one of these, return the array size (i.e. the VectorType's size).
|
|
Otherwise return zero.
|
|
*/
|
|
static int
|
|
lArrayVectorWidth(LLVM_TYPE_CONST llvm::Type *t) {
|
|
LLVM_TYPE_CONST llvm::ArrayType *arrayType =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(t);
|
|
if (arrayType == NULL)
|
|
return 0;
|
|
|
|
// We shouldn't be seeing arrays of anything but vectors being passed
|
|
// to things like FunctionEmitContext::BinaryOperator() as operands.
|
|
LLVM_TYPE_CONST llvm::VectorType *vectorElementType =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(arrayType->getElementType());
|
|
assert((vectorElementType != NULL &&
|
|
(int)vectorElementType->getNumElements() == g->target.vectorWidth));
|
|
|
|
return (int)arrayType->getNumElements();
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst,
|
|
llvm::Value *v0, llvm::Value *v1,
|
|
const char *name) {
|
|
if (v0 == NULL || v1 == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
assert(v0->getType() == v1->getType());
|
|
LLVM_TYPE_CONST llvm::Type *type = v0->getType();
|
|
int arraySize = lArrayVectorWidth(type);
|
|
if (arraySize == 0) {
|
|
llvm::Instruction *bop =
|
|
llvm::BinaryOperator::Create(inst, v0, v1, name ? name : "", bblock);
|
|
AddDebugPos(bop);
|
|
return bop;
|
|
}
|
|
else {
|
|
// If this is an ispc VectorType, apply the binary operator to each
|
|
// of the elements of the array (which in turn should be either
|
|
// scalar types or llvm::VectorTypes.)
|
|
llvm::Value *ret = llvm::UndefValue::get(type);
|
|
for (int i = 0; i < arraySize; ++i) {
|
|
llvm::Value *a = ExtractInst(v0, i);
|
|
llvm::Value *b = ExtractInst(v1, i);
|
|
llvm::Value *op = BinaryOperator(inst, a, b);
|
|
ret = InsertInst(ret, op, i);
|
|
}
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) {
|
|
if (v == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
// Similarly to BinaryOperator, do the operation on all the elements of
|
|
// the array if we're given an array type; otherwise just do the
|
|
// regular llvm operation.
|
|
LLVM_TYPE_CONST llvm::Type *type = v->getType();
|
|
int arraySize = lArrayVectorWidth(type);
|
|
if (arraySize == 0) {
|
|
llvm::Instruction *binst =
|
|
llvm::BinaryOperator::CreateNot(v, name ? name : "not", bblock);
|
|
AddDebugPos(binst);
|
|
return binst;
|
|
}
|
|
else {
|
|
llvm::Value *ret = llvm::UndefValue::get(type);
|
|
for (int i = 0; i < arraySize; ++i) {
|
|
llvm::Value *a = ExtractInst(v, i);
|
|
llvm::Value *op =
|
|
llvm::BinaryOperator::CreateNot(a, name ? name : "not", bblock);
|
|
AddDebugPos(op);
|
|
ret = InsertInst(ret, op, i);
|
|
}
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
// Given the llvm Type that represents an ispc VectorType, return an
|
|
// equally-shaped type with boolean elements. (This is the type that will
|
|
// be returned from CmpInst with ispc VectorTypes).
|
|
static LLVM_TYPE_CONST llvm::Type *
|
|
lGetMatchingBoolVectorType(LLVM_TYPE_CONST llvm::Type *type) {
|
|
LLVM_TYPE_CONST llvm::ArrayType *arrayType =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(type);
|
|
assert(arrayType != NULL);
|
|
|
|
LLVM_TYPE_CONST llvm::VectorType *vectorElementType =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(arrayType->getElementType());
|
|
assert(vectorElementType != NULL);
|
|
assert((int)vectorElementType->getNumElements() == g->target.vectorWidth);
|
|
|
|
LLVM_TYPE_CONST llvm::Type *base =
|
|
llvm::VectorType::get(LLVMTypes::BoolType, g->target.vectorWidth);
|
|
return llvm::ArrayType::get(base, arrayType->getNumElements());
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst,
|
|
llvm::CmpInst::Predicate pred,
|
|
llvm::Value *v0, llvm::Value *v1,
|
|
const char *name) {
|
|
if (v0 == NULL || v1 == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
assert(v0->getType() == v1->getType());
|
|
LLVM_TYPE_CONST llvm::Type *type = v0->getType();
|
|
int arraySize = lArrayVectorWidth(type);
|
|
if (arraySize == 0) {
|
|
llvm::Instruction *ci =
|
|
llvm::CmpInst::Create(inst, pred, v0, v1, name ? name : "cmp",
|
|
bblock);
|
|
AddDebugPos(ci);
|
|
return ci;
|
|
}
|
|
else {
|
|
LLVM_TYPE_CONST llvm::Type *boolType = lGetMatchingBoolVectorType(type);
|
|
llvm::Value *ret = llvm::UndefValue::get(boolType);
|
|
for (int i = 0; i < arraySize; ++i) {
|
|
llvm::Value *a = ExtractInst(v0, i);
|
|
llvm::Value *b = ExtractInst(v1, i);
|
|
llvm::Value *op = CmpInst(inst, pred, a, b, name);
|
|
ret = InsertInst(ret, op, i);
|
|
}
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
llvm::Value *ret = NULL;
|
|
LLVM_TYPE_CONST llvm::Type *eltType = value->getType();
|
|
|
|
LLVM_TYPE_CONST llvm::PointerType *pt =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(eltType);
|
|
if (pt != NULL) {
|
|
// Varying pointers are represented as vectors of i32/i64s
|
|
ret = llvm::UndefValue::get(LLVMTypes::VoidPointerVectorType);
|
|
value = PtrToIntInst(value);
|
|
}
|
|
else
|
|
// All other varying types are represented as vectors of the
|
|
// underlying type.
|
|
ret = llvm::UndefValue::get(llvm::VectorType::get(eltType,
|
|
g->target.vectorWidth));
|
|
|
|
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
|
llvm::Twine n = llvm::Twine("smear.") + llvm::Twine(name ? name : "") +
|
|
llvm::Twine(i);
|
|
ret = InsertInst(ret, value, i, n.str().c_str());
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::BitCastInst(llvm::Value *value,
|
|
LLVM_TYPE_CONST llvm::Type *type,
|
|
const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
llvm::Instruction *inst =
|
|
new llvm::BitCastInst(value, type, name ? name : "bitcast", bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(value->getType()))
|
|
// no-op for varying pointers; they're already vectors of ints
|
|
return value;
|
|
|
|
LLVM_TYPE_CONST llvm::Type *type = LLVMTypes::PointerIntType;
|
|
llvm::Instruction *inst =
|
|
new llvm::PtrToIntInst(value, type, name ? name : "ptr2int", bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::PtrToIntInst(llvm::Value *value,
|
|
LLVM_TYPE_CONST llvm::Type *toType,
|
|
const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
LLVM_TYPE_CONST llvm::Type *fromType = value->getType();
|
|
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(fromType)) {
|
|
// varying pointer
|
|
if (fromType == toType)
|
|
// already the right type--done
|
|
return value;
|
|
else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
|
|
return TruncInst(value, toType, "ptr_to_int");
|
|
else {
|
|
assert(fromType->getScalarSizeInBits() <
|
|
toType->getScalarSizeInBits());
|
|
return ZExtInst(value, toType, "ptr_to_int");
|
|
}
|
|
}
|
|
|
|
llvm::Instruction *inst =
|
|
new llvm::PtrToIntInst(value, toType, name ? name : "ptr2int", bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::IntToPtrInst(llvm::Value *value,
|
|
LLVM_TYPE_CONST llvm::Type *toType,
|
|
const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
LLVM_TYPE_CONST llvm::Type *fromType = value->getType();
|
|
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(fromType)) {
|
|
// varying pointer
|
|
if (fromType == toType)
|
|
// done
|
|
return value;
|
|
else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits())
|
|
return TruncInst(value, toType, "int_to_ptr");
|
|
else {
|
|
assert(fromType->getScalarSizeInBits() <
|
|
toType->getScalarSizeInBits());
|
|
return ZExtInst(value, toType, "int_to_ptr");
|
|
}
|
|
}
|
|
|
|
llvm::Instruction *inst =
|
|
new llvm::IntToPtrInst(value, toType, name ? name : "int2ptr", bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
llvm::Instruction *
|
|
FunctionEmitContext::TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
|
const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
// TODO: we should probably handle the array case as in
|
|
// e.g. BitCastInst(), but we don't currently need that functionality
|
|
llvm::Instruction *inst =
|
|
new llvm::TruncInst(value, type, name ? name : "trunc", bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
llvm::Instruction *
|
|
FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
|
|
LLVM_TYPE_CONST llvm::Type *type, const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
// TODO: we should probably handle the array case as in
|
|
// e.g. BitCastInst(), but we don't currently need that functionality
|
|
llvm::Instruction *inst =
|
|
llvm::CastInst::Create(op, value, type, name ? name : "cast", bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
llvm::Instruction *
|
|
FunctionEmitContext::FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
|
const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
// TODO: we should probably handle the array case as in
|
|
// e.g. BitCastInst(), but we don't currently need that functionality
|
|
llvm::Instruction *inst =
|
|
llvm::CastInst::CreateFPCast(value, type, name ? name : "fpcast", bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
llvm::Instruction *
|
|
FunctionEmitContext::SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
|
const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
// TODO: we should probably handle the array case as in
|
|
// e.g. BitCastInst(), but we don't currently need that functionality
|
|
llvm::Instruction *inst =
|
|
new llvm::SExtInst(value, type, name ? name : "sext", bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
llvm::Instruction *
|
|
FunctionEmitContext::ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
|
const char *name) {
|
|
if (value == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
// TODO: we should probably handle the array case as in
|
|
// e.g. BitCastInst(), but we don't currently need that functionality
|
|
llvm::Instruction *inst =
|
|
new llvm::ZExtInst(value, type, name ? name : "zext", bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
/** Utility routine used by the GetElementPtrInst() methods; given a
|
|
pointer to some type (either uniform or varying) and an index (also
|
|
either uniform or varying), this returns the new pointer (varying if
|
|
appropriate) given by offsetting the base pointer by the index times
|
|
the size of the object that the pointer points to.
|
|
*/
|
|
llvm::Value *
|
|
FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Value *index,
|
|
const Type *ptrType) {
|
|
// Find the scale factor for the index (i.e. the size of the object
|
|
// that the pointer(s) point(s) to.
|
|
const Type *scaleType = ptrType->GetBaseType();
|
|
llvm::Value *scale = g->target.SizeOf(scaleType->LLVMType(g->ctx));
|
|
|
|
bool indexIsVarying =
|
|
llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index->getType());
|
|
llvm::Value *offset = NULL;
|
|
if (indexIsVarying == false) {
|
|
// Truncate or sign extend the index as appropriate to a 32 or
|
|
// 64-bit type.
|
|
if ((g->target.is32Bit || g->opt.force32BitAddressing) &&
|
|
index->getType() == LLVMTypes::Int64Type)
|
|
index = TruncInst(index, LLVMTypes::Int32Type, "trunc_index");
|
|
else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) &&
|
|
index->getType() == LLVMTypes::Int32Type)
|
|
index = SExtInst(index, LLVMTypes::Int64Type, "sext_index");
|
|
|
|
// do a scalar multiply to get the offset as index * scale and then
|
|
// smear the result out to be a vector; this is more efficient than
|
|
// first promoting both the scale and the index to vectors and then
|
|
// multiplying.
|
|
offset = BinaryOperator(llvm::Instruction::Mul, scale, index);
|
|
offset = SmearUniform(offset, "offset_smear");
|
|
}
|
|
else {
|
|
// Similarly, truncate or sign extend the index to be a 32 or 64
|
|
// bit vector type
|
|
if ((g->target.is32Bit || g->opt.force32BitAddressing) &&
|
|
index->getType() == LLVMTypes::Int64VectorType)
|
|
index = TruncInst(index, LLVMTypes::Int32VectorType, "trunc_index");
|
|
else if ((!g->target.is32Bit && !g->opt.force32BitAddressing) &&
|
|
index->getType() == LLVMTypes::Int32VectorType)
|
|
index = SExtInst(index, LLVMTypes::Int64VectorType, "sext_index");
|
|
|
|
scale = SmearUniform(scale, "scale_smear");
|
|
|
|
// offset = index * scale
|
|
offset = BinaryOperator(llvm::Instruction::Mul, scale, index, "offset");
|
|
}
|
|
|
|
// For 64-bit targets, if we've been doing our offset calculations in
|
|
// 32 bits, we still have to convert to a 64-bit value before we
|
|
// actually add the offset to the pointer.
|
|
if (g->target.is32Bit == false && g->opt.force32BitAddressing == true)
|
|
offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
|
|
|
|
// Smear out the pointer to be varying; either the base pointer or the
|
|
// index must be varying for this method to be called.
|
|
bool baseIsUniform =
|
|
(llvm::isa<LLVM_TYPE_CONST llvm::PointerType>(basePtr->getType()));
|
|
assert(baseIsUniform == false || indexIsVarying == true);
|
|
llvm::Value *varyingPtr = baseIsUniform ?
|
|
SmearUniform(basePtr, "ptr_smear") : basePtr;
|
|
|
|
// newPtr = ptr + offset
|
|
return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, "new_ptr");
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index,
|
|
const Type *ptrType, const char *name) {
|
|
if (basePtr == NULL || index == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
if (dynamic_cast<const ReferenceType *>(ptrType) != NULL)
|
|
ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget());
|
|
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
|
|
|
bool indexIsVaryingType =
|
|
llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index->getType());
|
|
|
|
if (indexIsVaryingType == false && ptrType->IsUniformType() == true) {
|
|
// The easy case: both the base pointer and the indices are
|
|
// uniform, so just emit the regular LLVM GEP instruction
|
|
llvm::Value *ind[1] = { index };
|
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
|
llvm::ArrayRef<llvm::Value *> arrayRef(&ind[0], &ind[1]);
|
|
llvm::Instruction *inst =
|
|
llvm::GetElementPtrInst::Create(basePtr, arrayRef,
|
|
name ? name : "gep", bblock);
|
|
#else
|
|
llvm::Instruction *inst =
|
|
llvm::GetElementPtrInst::Create(basePtr, &ind[0], &ind[1],
|
|
name ? name : "gep", bblock);
|
|
#endif
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
else
|
|
return applyVaryingGEP(basePtr, index, ptrType);
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0,
|
|
llvm::Value *index1, const Type *ptrType,
|
|
const char *name) {
|
|
if (basePtr == NULL || index0 == NULL || index1 == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
if (dynamic_cast<const ReferenceType *>(ptrType) != NULL)
|
|
ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget());
|
|
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
|
|
|
bool index0IsVaryingType =
|
|
llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index0->getType());
|
|
bool index1IsVaryingType =
|
|
llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(index1->getType());
|
|
|
|
if (index0IsVaryingType == false && index1IsVaryingType == false &&
|
|
ptrType->IsUniformType() == true) {
|
|
// The easy case: both the base pointer and the indices are
|
|
// uniform, so just emit the regular LLVM GEP instruction
|
|
llvm::Value *indices[2] = { index0, index1 };
|
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
|
llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
|
|
llvm::Instruction *inst =
|
|
llvm::GetElementPtrInst::Create(basePtr, arrayRef,
|
|
name ? name : "gep", bblock);
|
|
#else
|
|
llvm::Instruction *inst =
|
|
llvm::GetElementPtrInst::Create(basePtr, &indices[0], &indices[2],
|
|
name ? name : "gep", bblock);
|
|
#endif
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
else {
|
|
// Handle the first dimension with index0
|
|
llvm::Value *ptr0 = GetElementPtrInst(basePtr, index0, ptrType);
|
|
|
|
// Now index into the second dimension with index1. First figure
|
|
// out the type of ptr0.
|
|
const Type *baseType = ptrType->GetBaseType();
|
|
const SequentialType *st = dynamic_cast<const SequentialType *>(baseType);
|
|
assert(st != NULL);
|
|
|
|
bool ptr0IsUniform =
|
|
llvm::isa<LLVM_TYPE_CONST llvm::PointerType>(ptr0->getType());
|
|
const Type *ptr0BaseType = st->GetElementType();
|
|
const Type *ptr0Type = ptr0IsUniform ?
|
|
PointerType::GetUniform(ptr0BaseType) :
|
|
PointerType::GetVarying(ptr0BaseType);
|
|
|
|
return applyVaryingGEP(ptr0, index1, ptr0Type);
|
|
}
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::AddElementOffset(llvm::Value *basePtr, int elementNum,
|
|
const Type *ptrType, const char *name) {
|
|
if (ptrType == NULL || ptrType->IsUniformType() ||
|
|
dynamic_cast<const ReferenceType *>(ptrType) != NULL) {
|
|
// If the pointer is uniform or we have a reference (which is a
|
|
// uniform pointer in the end), we can use the regular LLVM GEP.
|
|
llvm::Value *offsets[2] = { LLVMInt32(0), LLVMInt32(elementNum) };
|
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
|
llvm::ArrayRef<llvm::Value *> arrayRef(&offsets[0], &offsets[2]);
|
|
return llvm::GetElementPtrInst::Create(basePtr, arrayRef,
|
|
name ? name : "struct_offset", bblock);
|
|
#else
|
|
return llvm::GetElementPtrInst::Create(basePtr, &offsets[0], &offsets[2],
|
|
name ? name : "struct_offset", bblock);
|
|
#endif
|
|
|
|
}
|
|
|
|
if (dynamic_cast<const ReferenceType *>(ptrType) != NULL)
|
|
ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget());
|
|
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
|
|
|
// Otherwise do the math to find the offset and add it to the given
|
|
// varying pointers
|
|
const StructType *st =
|
|
dynamic_cast<const StructType *>(ptrType->GetBaseType());
|
|
llvm::Value *offset = NULL;
|
|
if (st != NULL)
|
|
// If the pointer is to a structure, Target::StructOffset() gives
|
|
// us the offset in bytes to the given element of the structure
|
|
offset = g->target.StructOffset(st->LLVMType(g->ctx), elementNum);
|
|
else {
|
|
// Otherwise we should have a vector here and the offset is given
|
|
// by the element number times the size of the element type of the
|
|
// vector.
|
|
const VectorType *vt =
|
|
dynamic_cast<const VectorType *>(ptrType->GetBaseType());
|
|
assert(vt != NULL);
|
|
llvm::Value *size =
|
|
g->target.SizeOf(vt->GetElementType()->LLVMType(g->ctx));
|
|
llvm::Value *scale = (g->target.is32Bit || g->opt.force32BitAddressing) ?
|
|
LLVMInt32(elementNum) : LLVMInt64(elementNum);
|
|
offset = BinaryOperator(llvm::Instruction::Mul, size, scale);
|
|
}
|
|
|
|
offset = SmearUniform(offset, "offset_smear");
|
|
|
|
if (g->target.is32Bit == false && g->opt.force32BitAddressing == true)
|
|
// If we're doing 32 bit addressing with a 64 bit target, although
|
|
// we did the math above in 32 bit, we need to go to 64 bit before
|
|
// we add the offset to the varying pointers.
|
|
offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
|
|
|
|
return BinaryOperator(llvm::Instruction::Add, basePtr, offset,
|
|
"struct_ptr_offset");
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
|
|
if (ptr == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
LLVM_TYPE_CONST llvm::PointerType *pt =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(ptr->getType());
|
|
assert(pt != NULL);
|
|
|
|
// FIXME: it's not clear to me that we generate unaligned vector loads
|
|
// of varying stuff out of the front-end any more. (Only by the
|
|
// optimization passes that lower gathers to vector loads, I think..)
|
|
// So remove this??
|
|
int align = 0;
|
|
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(pt->getElementType()))
|
|
align = 1;
|
|
llvm::Instruction *inst = new llvm::LoadInst(ptr, name ? name : "load",
|
|
false /* not volatile */,
|
|
align, bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask,
|
|
const Type *ptrType, const char *name) {
|
|
if (ptr == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
assert(ptrType != NULL && mask != NULL);
|
|
|
|
if (dynamic_cast<const ReferenceType *>(ptrType) != NULL)
|
|
ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget());
|
|
|
|
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
|
|
|
if (ptrType->IsUniformType()) {
|
|
// FIXME: same issue as above load inst regarding alignment...
|
|
//
|
|
// If the ptr is a straight up regular pointer, then just issue
|
|
// a regular load. First figure out the alignment; in general we
|
|
// can just assume the natural alignment (0 here), but for varying
|
|
// atomic types, we need to make sure that the compiler emits
|
|
// unaligned vector loads, so we specify a reduced alignment here.
|
|
int align = 0;
|
|
const AtomicType *atomicType =
|
|
dynamic_cast<const AtomicType *>(ptrType->GetBaseType());
|
|
if (atomicType != NULL && atomicType->IsVaryingType())
|
|
// We actually just want to align to the vector element
|
|
// alignment, but can't easily get that here, so just tell LLVM
|
|
// it's totally unaligned. (This shouldn't make any difference
|
|
// vs the proper alignment in practice.)
|
|
align = 1;
|
|
llvm::Instruction *inst = new llvm::LoadInst(ptr, name ? name : "load",
|
|
false /* not volatile */,
|
|
align, bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
else {
|
|
// Otherwise we should have a varying ptr and it's time for a
|
|
// gather.
|
|
return gather(ptr, ptrType, mask, name);
|
|
}
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::gather(llvm::Value *ptr, const Type *ptrType,
|
|
llvm::Value *mask, const char *name) {
|
|
// We should have a varying lvalue if we get here...
|
|
assert(ptrType->IsVaryingType() &&
|
|
ptr->getType() == LLVMTypes::VoidPointerVectorType);
|
|
|
|
const Type *returnType = ptrType->GetBaseType()->GetAsVaryingType();
|
|
LLVM_TYPE_CONST llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
|
|
|
|
const CollectionType *collectionType =
|
|
dynamic_cast<const CollectionType *>(ptrType->GetBaseType());
|
|
if (collectionType != NULL) {
|
|
// For collections, recursively gather element wise to find the
|
|
// result.
|
|
llvm::Value *retValue = llvm::UndefValue::get(llvmReturnType);
|
|
for (int i = 0; i < collectionType->GetElementCount(); ++i) {
|
|
llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType);
|
|
const Type *eltPtrType =
|
|
PointerType::GetVarying(collectionType->GetElementType(i));
|
|
eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType);
|
|
|
|
// This in turn will be another gather
|
|
llvm::Value *eltValues = LoadInst(eltPtr, mask, eltPtrType, name);
|
|
|
|
retValue = InsertInst(retValue, eltValues, i, "set_value");
|
|
}
|
|
return retValue;
|
|
}
|
|
|
|
// Otherwise we should just have a basic scalar or pointer type and we
|
|
// can go and do the actual gather
|
|
AddInstrumentationPoint("gather");
|
|
|
|
// Figure out which gather function to call based on the size of
|
|
// the elements.
|
|
const PointerType *pt = dynamic_cast<const PointerType *>(returnType);
|
|
const char *funcName = NULL;
|
|
if (pt != NULL)
|
|
funcName = g->target.is32Bit ? "__pseudo_gather32_32" :
|
|
"__pseudo_gather64_64";
|
|
else if (llvmReturnType == LLVMTypes::DoubleVectorType ||
|
|
llvmReturnType == LLVMTypes::Int64VectorType)
|
|
funcName = g->target.is32Bit ? "__pseudo_gather32_64" :
|
|
"__pseudo_gather64_64";
|
|
else if (llvmReturnType == LLVMTypes::FloatVectorType ||
|
|
llvmReturnType == LLVMTypes::Int32VectorType)
|
|
funcName = g->target.is32Bit ? "__pseudo_gather32_32" :
|
|
"__pseudo_gather64_32";
|
|
else if (llvmReturnType == LLVMTypes::Int16VectorType)
|
|
funcName = g->target.is32Bit ? "__pseudo_gather32_16" :
|
|
"__pseudo_gather64_16";
|
|
else {
|
|
assert(llvmReturnType == LLVMTypes::Int8VectorType);
|
|
funcName = g->target.is32Bit ? "__pseudo_gather32_8" :
|
|
"__pseudo_gather64_8";
|
|
}
|
|
|
|
llvm::Function *gatherFunc = m->module->getFunction(funcName);
|
|
assert(gatherFunc != NULL);
|
|
|
|
llvm::Value *call = CallInst(gatherFunc, NULL, ptr, mask, name);
|
|
|
|
// Add metadata about the source file location so that the
|
|
// optimization passes can print useful performance warnings if we
|
|
// can't optimize out this gather
|
|
addGSMetadata(call, currentPos);
|
|
|
|
return BitCastInst(call, llvmReturnType, "gather_bitcast");
|
|
}
|
|
|
|
|
|
/** Add metadata to the given instruction to encode the current source file
|
|
position. This data is used in the lGetSourcePosFromMetadata()
|
|
function in opt.cpp.
|
|
*/
|
|
void
|
|
FunctionEmitContext::addGSMetadata(llvm::Value *v, SourcePos pos) {
|
|
llvm::Instruction *inst = llvm::dyn_cast<llvm::Instruction>(v);
|
|
if (inst == NULL)
|
|
return;
|
|
|
|
llvm::Value *str = llvm::MDString::get(*g->ctx, pos.name);
|
|
llvm::MDNode *md = llvm::MDNode::get(*g->ctx, str);
|
|
inst->setMetadata("filename", md);
|
|
|
|
llvm::Value *first_line = LLVMInt32(pos.first_line);
|
|
md = llvm::MDNode::get(*g->ctx, first_line);
|
|
inst->setMetadata("first_line", md);
|
|
|
|
llvm::Value *first_column = LLVMInt32(pos.first_column);
|
|
md = llvm::MDNode::get(*g->ctx, first_column);
|
|
inst->setMetadata("first_column", md);
|
|
|
|
llvm::Value *last_line = LLVMInt32(pos.last_line);
|
|
md = llvm::MDNode::get(*g->ctx, last_line);
|
|
inst->setMetadata("last_line", md);
|
|
|
|
llvm::Value *last_column = LLVMInt32(pos.last_column);
|
|
md = llvm::MDNode::get(*g->ctx, last_column);
|
|
inst->setMetadata("last_column", md);
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType,
|
|
const char *name, int align,
|
|
bool atEntryBlock) {
|
|
llvm::AllocaInst *inst = NULL;
|
|
if (atEntryBlock) {
|
|
// We usually insert it right before the jump instruction at the
|
|
// end of allocaBlock
|
|
llvm::Instruction *retInst = allocaBlock->getTerminator();
|
|
assert(retInst);
|
|
inst = new llvm::AllocaInst(llvmType, name ? name : "", retInst);
|
|
}
|
|
else
|
|
// Unless the caller overrode the default and wants it in the
|
|
// current basic block
|
|
inst = new llvm::AllocaInst(llvmType, name ? name : "", bblock);
|
|
|
|
// If no alignment was specified but we have an array of a uniform
|
|
// type, then align it to 4 * the native vector width; it's not
|
|
// unlikely that this array will be loaded into varying variables with
|
|
// what will be aligned accesses if the uniform -> varying load is done
|
|
// in regular chunks.
|
|
LLVM_TYPE_CONST llvm::ArrayType *arrayType =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(llvmType);
|
|
if (align == 0 && arrayType != NULL &&
|
|
!llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(arrayType->getElementType()))
|
|
align = 4 * g->target.nativeVectorWidth;
|
|
|
|
if (align != 0)
|
|
inst->setAlignment(align);
|
|
// Don't add debugging info to alloca instructions
|
|
return inst;
|
|
}
|
|
|
|
|
|
/** Code to store the given varying value to the given location, only
|
|
storing the elements that correspond to active program instances as
|
|
given by the provided storeMask value. Note that the lvalue is only a
|
|
single pointer, not a varying lvalue of one pointer per program
|
|
instance (that case is handled by scatters).
|
|
*/
|
|
void
|
|
FunctionEmitContext::maskedStore(llvm::Value *value, llvm::Value *ptr,
|
|
const Type *ptrType, llvm::Value *mask) {
|
|
if (value == NULL || ptr == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return;
|
|
}
|
|
|
|
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
|
assert(ptrType->IsUniformType());
|
|
|
|
const Type *valueType = ptrType->GetBaseType();
|
|
const CollectionType *collectionType =
|
|
dynamic_cast<const CollectionType *>(valueType);
|
|
if (collectionType != NULL) {
|
|
// Assigning a structure / array / vector. Handle each element
|
|
// individually with what turns into a recursive call to
|
|
// makedStore()
|
|
for (int i = 0; i < collectionType->GetElementCount(); ++i) {
|
|
llvm::Value *eltValue = ExtractInst(value, i, "value_member");
|
|
llvm::Value *eltPtr =
|
|
AddElementOffset(ptr, i, ptrType, "struct_ptr_ptr");
|
|
const Type *eltPtrType =
|
|
PointerType::GetUniform(collectionType->GetElementType(i));
|
|
StoreInst(eltValue, eltPtr, mask, eltPtrType);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// We must have a regular atomic, enumerator, or pointer type at this
|
|
// point.
|
|
assert(dynamic_cast<const AtomicType *>(valueType) != NULL ||
|
|
dynamic_cast<const EnumType *>(valueType) != NULL ||
|
|
dynamic_cast<const PointerType *>(valueType) != NULL);
|
|
valueType = valueType->GetAsNonConstType();
|
|
|
|
llvm::Function *maskedStoreFunc = NULL;
|
|
// Figure out if we need a 8, 16, 32 or 64-bit masked store.
|
|
if (dynamic_cast<const PointerType *>(valueType) != NULL) {
|
|
if (g->target.is32Bit)
|
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32");
|
|
else
|
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_64");
|
|
}
|
|
else if (valueType == AtomicType::VaryingDouble ||
|
|
valueType == AtomicType::VaryingInt64 ||
|
|
valueType == AtomicType::VaryingUInt64) {
|
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_64");
|
|
ptr = BitCastInst(ptr, LLVMTypes::Int64VectorPointerType,
|
|
"ptr_to_int64vecptr");
|
|
value = BitCastInst(value, LLVMTypes::Int64VectorType,
|
|
"value_to_int64");
|
|
}
|
|
else if (valueType == AtomicType::VaryingFloat ||
|
|
valueType == AtomicType::VaryingBool ||
|
|
valueType == AtomicType::VaryingInt32 ||
|
|
valueType == AtomicType::VaryingUInt32 ||
|
|
dynamic_cast<const EnumType *>(valueType) != NULL) {
|
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_32");
|
|
ptr = BitCastInst(ptr, LLVMTypes::Int32VectorPointerType,
|
|
"ptr_to_int32vecptr");
|
|
if (valueType == AtomicType::VaryingFloat)
|
|
value = BitCastInst(value, LLVMTypes::Int32VectorType,
|
|
"value_to_int32");
|
|
}
|
|
else if (valueType == AtomicType::VaryingInt16 ||
|
|
valueType == AtomicType::VaryingUInt16) {
|
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_16");
|
|
ptr = BitCastInst(ptr, LLVMTypes::Int16VectorPointerType,
|
|
"ptr_to_int16vecptr");
|
|
}
|
|
else if (valueType == AtomicType::VaryingInt8 ||
|
|
valueType == AtomicType::VaryingUInt8) {
|
|
maskedStoreFunc = m->module->getFunction("__pseudo_masked_store_8");
|
|
ptr = BitCastInst(ptr, LLVMTypes::Int8VectorPointerType,
|
|
"ptr_to_int8vecptr");
|
|
}
|
|
assert(maskedStoreFunc != NULL);
|
|
|
|
std::vector<llvm::Value *> args;
|
|
args.push_back(ptr);
|
|
args.push_back(value);
|
|
args.push_back(mask);
|
|
CallInst(maskedStoreFunc, NULL, args);
|
|
}
|
|
|
|
|
|
|
|
/** Scatter the given varying value to the locations given by the varying
|
|
lvalue (which should be an array of pointers with size equal to the
|
|
target's vector width. We want to store each rvalue element at the
|
|
corresponding pointer's location, *if* the mask for the corresponding
|
|
program instance are on. If they're off, don't do anything.
|
|
*/
|
|
void
|
|
FunctionEmitContext::scatter(llvm::Value *value, llvm::Value *ptr,
|
|
const Type *ptrType, llvm::Value *mask) {
|
|
assert(dynamic_cast<const PointerType *>(ptrType) != NULL);
|
|
assert(ptrType->IsVaryingType());
|
|
|
|
const Type *valueType = ptrType->GetBaseType();
|
|
|
|
// I think this should be impossible
|
|
assert(dynamic_cast<const ArrayType *>(valueType) == NULL);
|
|
|
|
const CollectionType *collectionType = dynamic_cast<const CollectionType *>(valueType);
|
|
if (collectionType != NULL) {
|
|
// Scatter the collection elements individually
|
|
for (int i = 0; i < collectionType->GetElementCount(); ++i) {
|
|
llvm::Value *eltPtr = AddElementOffset(ptr, i, ptrType);
|
|
llvm::Value *eltValue = ExtractInst(value, i);
|
|
const Type *eltPtrType =
|
|
PointerType::GetVarying(collectionType->GetElementType(i));
|
|
eltPtr = addVaryingOffsetsIfNeeded(eltPtr, eltPtrType);
|
|
scatter(eltValue, eltPtr, eltPtrType, mask);
|
|
}
|
|
return;
|
|
}
|
|
|
|
const PointerType *pt = dynamic_cast<const PointerType *>(valueType);
|
|
|
|
// And everything should be a pointer or atomic from here on out...
|
|
assert(pt != NULL ||
|
|
dynamic_cast<const AtomicType *>(valueType) != NULL);
|
|
|
|
LLVM_TYPE_CONST llvm::Type *type = value->getType();
|
|
const char *funcName = NULL;
|
|
if (pt != NULL)
|
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_32" :
|
|
"__pseudo_scatter64_64";
|
|
else if (type == LLVMTypes::DoubleVectorType ||
|
|
type == LLVMTypes::Int64VectorType) {
|
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_64" :
|
|
"__pseudo_scatter64_64";
|
|
value = BitCastInst(value, LLVMTypes::Int64VectorType, "value2int");
|
|
}
|
|
else if (type == LLVMTypes::FloatVectorType ||
|
|
type == LLVMTypes::Int32VectorType) {
|
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_32" :
|
|
"__pseudo_scatter64_32";
|
|
value = BitCastInst(value, LLVMTypes::Int32VectorType, "value2int");
|
|
}
|
|
else if (type == LLVMTypes::Int16VectorType)
|
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_16" :
|
|
"__pseudo_scatter64_16";
|
|
else if (type == LLVMTypes::Int8VectorType)
|
|
funcName = g->target.is32Bit ? "__pseudo_scatter32_8" :
|
|
"__pseudo_scatter64_8";
|
|
|
|
llvm::Function *scatterFunc = m->module->getFunction(funcName);
|
|
assert(scatterFunc != NULL);
|
|
|
|
AddInstrumentationPoint("scatter");
|
|
|
|
std::vector<llvm::Value *> args;
|
|
args.push_back(ptr);
|
|
args.push_back(value);
|
|
args.push_back(mask);
|
|
llvm::Value *inst = CallInst(scatterFunc, NULL, args);
|
|
addGSMetadata(inst, currentPos);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
|
|
if (value == NULL || ptr == NULL) {
|
|
// may happen due to error elsewhere
|
|
assert(m->errorCount > 0);
|
|
return;
|
|
}
|
|
|
|
llvm::Instruction *inst;
|
|
if (llvm::isa<llvm::VectorType>(value->getType()))
|
|
// FIXME: same for load--do we still need/want this??
|
|
// Specify an unaligned store, since we don't know that the ptr
|
|
// will in fact be aligned to a vector width here. (Actually
|
|
// should be aligned to the alignment of the vector elment type...)
|
|
inst = new llvm::StoreInst(value, ptr, false /* not volatile */,
|
|
1, bblock);
|
|
else
|
|
inst = new llvm::StoreInst(value, ptr, bblock);
|
|
|
|
AddDebugPos(inst);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr,
|
|
llvm::Value *mask, const Type *ptrType) {
|
|
if (value == NULL || ptr == NULL) {
|
|
// may happen due to error elsewhere
|
|
assert(m->errorCount > 0);
|
|
return;
|
|
}
|
|
|
|
if (dynamic_cast<const ReferenceType *>(ptrType) != NULL)
|
|
ptrType = PointerType::GetUniform(ptrType->GetReferenceTarget());
|
|
|
|
// Figure out what kind of store we're doing here
|
|
if (ptrType->IsUniformType()) {
|
|
if (ptrType->GetBaseType()->IsUniformType())
|
|
// the easy case
|
|
StoreInst(value, ptr);
|
|
else if (mask == LLVMMaskAllOn)
|
|
// Otherwise it is a masked store unless we can determine that the
|
|
// mask is all on... (Unclear if this check is actually useful.)
|
|
StoreInst(value, ptr);
|
|
else
|
|
maskedStore(value, ptr, ptrType, mask);
|
|
}
|
|
else {
|
|
assert(ptrType->IsVaryingType());
|
|
// We have a varying ptr (an array of pointers), so it's time to
|
|
// scatter
|
|
scatter(value, ptr, ptrType, mask);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::BranchInst(llvm::BasicBlock *dest) {
|
|
llvm::Instruction *b = llvm::BranchInst::Create(dest, bblock);
|
|
AddDebugPos(b);
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock,
|
|
llvm::BasicBlock *falseBlock,
|
|
llvm::Value *test) {
|
|
if (test == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return;
|
|
}
|
|
|
|
llvm::Instruction *b =
|
|
llvm::BranchInst::Create(trueBlock, falseBlock, test, bblock);
|
|
AddDebugPos(b);
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) {
|
|
if (v == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
llvm::Instruction *ei = NULL;
|
|
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(v->getType()))
|
|
ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt),
|
|
name ? name : "extract", bblock);
|
|
else
|
|
ei = llvm::ExtractValueInst::Create(v, elt, name ? name : "extract",
|
|
bblock);
|
|
AddDebugPos(ei);
|
|
return ei;
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
|
const char *name) {
|
|
if (v == NULL || eltVal == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
llvm::Instruction *ii = NULL;
|
|
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(v->getType()))
|
|
ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt),
|
|
name ? name : "insert", bblock);
|
|
else
|
|
ii = llvm::InsertValueInst::Create(v, eltVal, elt,
|
|
name ? name : "insert", bblock);
|
|
AddDebugPos(ii);
|
|
return ii;
|
|
}
|
|
|
|
|
|
llvm::PHINode *
|
|
FunctionEmitContext::PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count,
|
|
const char *name) {
|
|
llvm::PHINode *pn = llvm::PHINode::Create(type,
|
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
|
count,
|
|
#endif // LLVM_3_0
|
|
name ? name : "phi", bblock);
|
|
AddDebugPos(pn);
|
|
return pn;
|
|
}
|
|
|
|
|
|
llvm::Instruction *
|
|
FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0,
|
|
llvm::Value *val1, const char *name) {
|
|
if (test == NULL || val0 == NULL || val1 == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
llvm::Instruction *inst =
|
|
llvm::SelectInst::Create(test, val0, val1, name ? name : "select",
|
|
bblock);
|
|
AddDebugPos(inst);
|
|
return inst;
|
|
}
|
|
|
|
|
|
/** Given a value representing a function to be called or possibly-varying
|
|
pointer to a function to be called, figure out how many arguments the
|
|
function has. */
|
|
static unsigned int
|
|
lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
|
|
LLVM_TYPE_CONST llvm::FunctionType *ft =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::FunctionType>(callee->getType());
|
|
|
|
if (ft == NULL) {
|
|
LLVM_TYPE_CONST llvm::PointerType *pt =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(callee->getType());
|
|
if (pt == NULL) {
|
|
// varying--in this case, it must be the version of the
|
|
// function that takes a mask
|
|
return funcType->GetNumParameters() + 1;
|
|
}
|
|
ft = llvm::dyn_cast<LLVM_TYPE_CONST llvm::FunctionType>(pt->getElementType());
|
|
}
|
|
|
|
assert(ft != NULL);
|
|
return ft->getNumParams();
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
|
const std::vector<llvm::Value *> &args,
|
|
const char *name) {
|
|
if (func == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
std::vector<llvm::Value *> argVals = args;
|
|
// Most of the time, the mask is passed as the last argument. this
|
|
// isn't the case for things like intrinsics, builtins, and extern "C"
|
|
// functions from the application. Add the mask if it's needed.
|
|
unsigned int calleeArgCount = lCalleeArgCount(func, funcType);
|
|
assert(argVals.size() + 1 == calleeArgCount ||
|
|
argVals.size() == calleeArgCount);
|
|
if (argVals.size() + 1 == calleeArgCount)
|
|
argVals.push_back(GetFullMask());
|
|
|
|
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(func->getType()) == false) {
|
|
// Regular 'uniform' function call--just one function or function
|
|
// pointer, so just emit the IR directly.
|
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
|
llvm::Instruction *ci =
|
|
llvm::CallInst::Create(func, argVals, name ? name : "", bblock);
|
|
#else
|
|
llvm::Instruction *ci =
|
|
llvm::CallInst::Create(func, argVals.begin(), argVals.end(),
|
|
name ? name : "", bblock);
|
|
#endif
|
|
AddDebugPos(ci);
|
|
return ci;
|
|
}
|
|
else {
|
|
// Emit the code for a varying function call, where we have an
|
|
// vector of function pointers, one for each program instance. The
|
|
// basic strategy is that we go through the function pointers, and
|
|
// for the executing program instances, for each unique function
|
|
// pointer that's in the vector, call that function with a mask
|
|
// equal to the set of active program instances that also have that
|
|
// function pointer. When all unique function pointers have been
|
|
// called, we're done.
|
|
|
|
llvm::BasicBlock *bbTest = CreateBasicBlock("varying_funcall_test");
|
|
llvm::BasicBlock *bbCall = CreateBasicBlock("varying_funcall_call");
|
|
llvm::BasicBlock *bbDone = CreateBasicBlock("varying_funcall_done");
|
|
|
|
// Get the current mask value so we can restore it later
|
|
llvm::Value *origMask = GetInternalMask();
|
|
|
|
// First allocate memory to accumulate the various program
|
|
// instances' return values...
|
|
const Type *returnType = funcType->GetReturnType();
|
|
LLVM_TYPE_CONST llvm::Type *llvmReturnType = returnType->LLVMType(g->ctx);
|
|
llvm::Value *resultPtr = NULL;
|
|
if (llvmReturnType->isVoidTy() == false)
|
|
resultPtr = AllocaInst(llvmReturnType);
|
|
|
|
// The memory pointed to by maskPointer tracks the set of program
|
|
// instances for which we still need to call the function they are
|
|
// pointing to. It starts out initialized with the mask of
|
|
// currently running program instances.
|
|
llvm::Value *maskPtr = AllocaInst(LLVMTypes::MaskType);
|
|
StoreInst(GetFullMask(), maskPtr);
|
|
|
|
// And now we branch to the test to see if there's more work to be
|
|
// done.
|
|
BranchInst(bbTest);
|
|
|
|
// bbTest: are any lanes of the mask still on? If so, jump to
|
|
// bbCall
|
|
SetCurrentBasicBlock(bbTest); {
|
|
llvm::Value *maskLoad = LoadInst(maskPtr);
|
|
llvm::Value *any = Any(maskLoad);
|
|
BranchInst(bbCall, bbDone, any);
|
|
}
|
|
|
|
// bbCall: this is the body of the loop that calls out to one of
|
|
// the active function pointer values.
|
|
SetCurrentBasicBlock(bbCall); {
|
|
// Figure out the first lane that still needs its function
|
|
// pointer to be called.
|
|
llvm::Value *currentMask = LoadInst(maskPtr);
|
|
llvm::Function *cttz =
|
|
m->module->getFunction("__count_trailing_zeros_i32");
|
|
assert(cttz != NULL);
|
|
llvm::Value *firstLane = CallInst(cttz, NULL, LaneMask(currentMask),
|
|
"first_lane");
|
|
|
|
// Get the pointer to the function we're going to call this
|
|
// time through: ftpr = func[firstLane]
|
|
llvm::Value *fptr =
|
|
llvm::ExtractElementInst::Create(func, firstLane,
|
|
"extract_fptr", bblock);
|
|
|
|
// Smear it out into an array of function pointers
|
|
llvm::Value *fptrSmear = SmearUniform(fptr, "func_ptr");
|
|
|
|
// fpOverlap = (fpSmearAsVec == fpOrigAsVec). This gives us a
|
|
// mask for the set of program instances that have the same
|
|
// value for their function pointer.
|
|
llvm::Value *fpOverlap =
|
|
CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
|
fptrSmear, func);
|
|
fpOverlap = I1VecToBoolVec(fpOverlap);
|
|
|
|
// Figure out the mask to use when calling the function
|
|
// pointer: we need to AND the current execution mask to handle
|
|
// the case of any non-running program instances that happen to
|
|
// have this function pointer value.
|
|
// callMask = (currentMask & fpOverlap)
|
|
llvm::Value *callMask =
|
|
BinaryOperator(llvm::Instruction::And, currentMask, fpOverlap,
|
|
"call_mask");
|
|
|
|
// Set the mask
|
|
SetInternalMask(callMask);
|
|
|
|
// bitcast the i32/64 function pointer to the actual function
|
|
// pointer type (the variant that includes a mask).
|
|
LLVM_TYPE_CONST llvm::Type *llvmFuncType =
|
|
funcType->LLVMFunctionType(g->ctx, true);
|
|
LLVM_TYPE_CONST llvm::Type *llvmFPtrType =
|
|
llvm::PointerType::get(llvmFuncType, 0);
|
|
llvm::Value *fptrCast = IntToPtrInst(fptr, llvmFPtrType);
|
|
|
|
// Call the function: callResult = call ftpr(args, args, call mask)
|
|
llvm::Value *callResult = CallInst(fptrCast, funcType, args, name);
|
|
|
|
// Now, do a masked store into the memory allocated to
|
|
// accumulate the result using the call mask.
|
|
if (callResult != NULL) {
|
|
assert(resultPtr != NULL);
|
|
StoreInst(callResult, resultPtr, callMask,
|
|
PointerType::GetUniform(returnType));
|
|
}
|
|
else
|
|
assert(resultPtr == NULL);
|
|
|
|
// Update the mask to turn off the program instances for which
|
|
// we just called the function.
|
|
// currentMask = currentMask & ~callmask
|
|
llvm::Value *notCallMask =
|
|
BinaryOperator(llvm::Instruction::Xor, callMask, LLVMMaskAllOn,
|
|
"~callMask");
|
|
currentMask = BinaryOperator(llvm::Instruction::And, currentMask,
|
|
notCallMask, "currentMask&~callMask");
|
|
StoreInst(currentMask, maskPtr);
|
|
|
|
// And go back to the test to see if we need to do another
|
|
// call.
|
|
BranchInst(bbTest);
|
|
}
|
|
|
|
// bbDone: We're all done; clean up and return the result we've
|
|
// accumulated in the result memory.
|
|
SetCurrentBasicBlock(bbDone);
|
|
SetInternalMask(origMask);
|
|
return resultPtr ? LoadInst(resultPtr) : NULL;
|
|
}
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
|
llvm::Value *arg, const char *name) {
|
|
std::vector<llvm::Value *> args;
|
|
args.push_back(arg);
|
|
return CallInst(func, funcType, args, name);
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
|
llvm::Value *arg0, llvm::Value *arg1,
|
|
const char *name) {
|
|
std::vector<llvm::Value *> args;
|
|
args.push_back(arg0);
|
|
args.push_back(arg1);
|
|
return CallInst(func, funcType, args, name);
|
|
}
|
|
|
|
|
|
llvm::Instruction *
|
|
FunctionEmitContext::ReturnInst() {
|
|
if (launchedTasks)
|
|
// Add a sync call at the end of any function that launched tasks
|
|
SyncInst();
|
|
|
|
llvm::Instruction *rinst = NULL;
|
|
if (returnValuePtr != NULL) {
|
|
// We have value(s) to return; load them from their storage
|
|
// location
|
|
llvm::Value *retVal = LoadInst(returnValuePtr, "return_value");
|
|
rinst = llvm::ReturnInst::Create(*g->ctx, retVal, bblock);
|
|
}
|
|
else {
|
|
assert(function->GetReturnType() == AtomicType::Void);
|
|
rinst = llvm::ReturnInst::Create(*g->ctx, bblock);
|
|
}
|
|
|
|
AddDebugPos(rinst);
|
|
bblock = NULL;
|
|
return rinst;
|
|
}
|
|
|
|
|
|
llvm::Value *
|
|
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
|
std::vector<llvm::Value *> &argVals,
|
|
llvm::Value *launchCount) {
|
|
if (callee == NULL) {
|
|
assert(m->errorCount > 0);
|
|
return NULL;
|
|
}
|
|
|
|
launchedTasks = true;
|
|
|
|
assert(llvm::isa<llvm::Function>(callee));
|
|
LLVM_TYPE_CONST llvm::Type *argType =
|
|
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
|
|
assert(llvm::PointerType::classof(argType));
|
|
LLVM_TYPE_CONST llvm::PointerType *pt =
|
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(argType);
|
|
assert(llvm::StructType::classof(pt->getElementType()));
|
|
LLVM_TYPE_CONST llvm::StructType *argStructType =
|
|
static_cast<LLVM_TYPE_CONST llvm::StructType *>(pt->getElementType());
|
|
assert(argStructType->getNumElements() == argVals.size() + 1);
|
|
|
|
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
|
assert(falloc != NULL);
|
|
llvm::Value *structSize = g->target.SizeOf(argStructType);
|
|
if (structSize->getType() != LLVMTypes::Int64Type)
|
|
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
|
|
// targets, SizeOf returns a 32-bit value
|
|
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
|
"struct_size_to_64");
|
|
int align = 4 * RoundUpPow2(g->target.nativeVectorWidth);
|
|
|
|
std::vector<llvm::Value *> allocArgs;
|
|
allocArgs.push_back(launchGroupHandlePtr);
|
|
allocArgs.push_back(structSize);
|
|
allocArgs.push_back(LLVMInt32(align));
|
|
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
|
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
|
|
|
// Copy the values of the parameters into the appropriate place in
|
|
// the argument block
|
|
for (unsigned int i = 0; i < argVals.size(); ++i) {
|
|
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
|
// don't need to do masked store here, I think
|
|
StoreInst(argVals[i], ptr);
|
|
}
|
|
|
|
// copy in the mask
|
|
llvm::Value *mask = GetFullMask();
|
|
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
|
|
"funarg_mask");
|
|
StoreInst(mask, ptr);
|
|
|
|
// And emit the call to the user-supplied task launch function, passing
|
|
// a pointer to the task function being called and a pointer to the
|
|
// argument block we just filled in
|
|
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
|
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
|
assert(flaunch != NULL);
|
|
std::vector<llvm::Value *> args;
|
|
args.push_back(launchGroupHandlePtr);
|
|
args.push_back(fptr);
|
|
args.push_back(voidmem);
|
|
args.push_back(launchCount);
|
|
return CallInst(flaunch, NULL, args, "");
|
|
}
|
|
|
|
|
|
void
|
|
FunctionEmitContext::SyncInst() {
|
|
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
|
llvm::Value *nullPtrValue =
|
|
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
|
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
|
|
llvm::CmpInst::ICMP_NE,
|
|
launchGroupHandle, nullPtrValue);
|
|
llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
|
|
llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
|
|
BranchInst(bSync, bPostSync, nonNull);
|
|
|
|
SetCurrentBasicBlock(bSync);
|
|
llvm::Function *fsync = m->module->getFunction("ISPCSync");
|
|
if (fsync == NULL)
|
|
FATAL("Couldn't find ISPCSync declaration?!");
|
|
CallInst(fsync, NULL, launchGroupHandle, "");
|
|
|
|
// zero out the handle so that if ISPCLaunch is called again in this
|
|
// function, it knows it's starting out from scratch
|
|
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
|
|
|
BranchInst(bPostSync);
|
|
|
|
SetCurrentBasicBlock(bPostSync);
|
|
}
|
|
|
|
|
|
/** When we gathering from or scattering to a varying atomic type, we need
|
|
to add an appropraite offset to the final address for each lane right
|
|
before we use it. Given a varying pointer we're about to use and its
|
|
type, this function determines whether these offsets are needed and
|
|
returns an updated pointer that incorporates these offsets if needed.
|
|
*/
|
|
llvm::Value *
|
|
FunctionEmitContext::addVaryingOffsetsIfNeeded(llvm::Value *ptr,
|
|
const Type *ptrType) {
|
|
// This should only be called for varying pointers
|
|
const PointerType *pt = dynamic_cast<const PointerType *>(ptrType);
|
|
assert(pt && pt->IsVaryingType());
|
|
|
|
const Type *baseType = ptrType->GetBaseType();
|
|
assert(dynamic_cast<const AtomicType *>(baseType) != NULL ||
|
|
dynamic_cast<const EnumType *>(baseType) != NULL ||
|
|
dynamic_cast<const PointerType *>(baseType));
|
|
if (baseType->IsUniformType())
|
|
return ptr;
|
|
|
|
// Find the size of a uniform element of the varying type
|
|
LLVM_TYPE_CONST llvm::Type *llvmBaseUniformType =
|
|
baseType->GetAsUniformType()->LLVMType(g->ctx);
|
|
llvm::Value *unifSize = g->target.SizeOf(llvmBaseUniformType);
|
|
unifSize = SmearUniform(unifSize);
|
|
|
|
// Compute offset = <0, 1, .. > * unifSize
|
|
llvm::Value *varyingOffsets = llvm::UndefValue::get(unifSize->getType());
|
|
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
|
llvm::Value *iValue = (g->target.is32Bit || g->opt.force32BitAddressing) ?
|
|
LLVMInt32(i) : LLVMInt64(i);
|
|
varyingOffsets = InsertInst(varyingOffsets, iValue, i, "varying_delta");
|
|
}
|
|
llvm::Value *offset = BinaryOperator(llvm::Instruction::Mul, unifSize,
|
|
varyingOffsets);
|
|
|
|
if (g->opt.force32BitAddressing == true && g->target.is32Bit == false)
|
|
// On 64-bit targets where we're doing 32-bit addressing
|
|
// calculations, we need to convert to an i64 vector before adding
|
|
// to the pointer
|
|
offset = SExtInst(offset, LLVMTypes::Int64VectorType, "offset_to_64");
|
|
|
|
return BinaryOperator(llvm::Instruction::Add, ptr, offset);
|
|
}
|