added #ifdef ISPC_NVPTX_ENALED ... #endif guards

This commit is contained in:
evghenii
2014-07-09 12:32:18 +02:00
parent 44c74728bc
commit b3c5a9c4d6
16 changed files with 726 additions and 553 deletions

700
stmt.cpp
View File

@@ -142,6 +142,7 @@ lHasUnsizedArrays(const Type *type) {
return lHasUnsizedArrays(at->GetElementType());
}
#ifdef ISPC_NVPTX_ENABLED
static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos &currentPos, const bool variable = false)
{
if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX)
@@ -198,6 +199,7 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
return value;
}
#endif /* ISPC_NVPTX_ENABLED */
void
DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
@@ -261,9 +263,8 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
return;
}
if (sym->storageClass == SC_STATIC) {
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
{
Error(sym->pos,
@@ -279,7 +280,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
PerformanceWarning(sym->pos,
"\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
sym->name.c_str());
#endif /* ISPC_NVPTX_ENABLED */
// For static variables, we need a compile-time constant value
// for its initializer; if there's no initializer, we use a
// zero value.
@@ -307,28 +308,38 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
if (cinit == NULL)
cinit = llvm::Constant::getNullValue(llvmType);
// Allocate space for the static variable in global scope, so
// that it persists across function calls
#ifdef ISPC_NVPTX_ENABLED
int addressSpace = 0;
if (g->target->getISA() == Target::NVPTX &&
sym->type->IsConstType() &&
sym->type->IsUniformType())
addressSpace = 4;
// Allocate space for the static variable in global scope, so
// that it persists across function calls
sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage, cinit,
llvm::Twine("static_") +
llvm::Twine("static.") +
llvm::Twine(sym->pos.first_line) +
llvm::Twine("_") + sym->name.c_str(),
llvm::Twine(".") + sym->name.c_str(),
NULL,
llvm::GlobalVariable::NotThreadLocal,
addressSpace);
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
#else /* ISPC_NVPTX_ENABLED */
sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage, cinit,
llvm::Twine("static.") +
llvm::Twine(sym->pos.first_line) +
llvm::Twine(".") + sym->name.c_str());
#endif /* ISPC_NVPTX_ENABLED */
// Tell the FunctionEmitContext about the variable
ctx->EmitVariableDebugInfo(sym);
}
#ifdef ISPC_NVPTX_ENABLED
else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) &&
/* NVPTX:
* only non-constant uniform data types are stored in shared memory
@@ -396,6 +407,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
sym->parentFunction = ctx->GetFunction();
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
}
#endif /* ISPC_NVPTX_ENABLED */
else
{
// For non-static variables, allocate storage on the stack
@@ -404,7 +416,6 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
// Tell the FunctionEmitContext about the variable; must do
// this before the initializer stuff.
ctx->EmitVariableDebugInfo(sym);
if (initExpr == 0 && sym->type->IsConstType())
Error(sym->pos, "Missing initializer for const variable "
"\"%s\".", sym->name.c_str());
@@ -412,7 +423,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
// And then get it initialized...
sym->parentFunction = ctx->GetFunction();
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
}
}
}
}
@@ -571,7 +582,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
if (testValue == NULL)
return;
#ifdef ISPC_NVPTX_ENABLED
#if 0
if (!isUniform && g->target->getISA() == Target::NVPTX)
{
@@ -582,7 +593,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
isUniform = true;
}
#endif
#endif /* ISPC_NVPTX_ENABLED */
if (isUniform) {
ctx->StartUniformIf();
@@ -865,11 +876,17 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
// Do any of the program instances want to run the 'true'
// block? If not, jump ahead to bNext.
#if 1
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
#else
#ifdef ISPC_NVPTX_ENABLED
#if 0
llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0);
#else
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
#endif
#else /* ISPC_NVPTX_ENABLED */
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
#endif /* ISPC_NVPTX_ENABLED */
ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ);
// Emit statements for true
@@ -886,11 +903,16 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
// Similarly, check to see if any of the instances want to
// run the 'false' block...
#if 1
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
#else
#ifdef ISPC_NVPTX_ENABLED
#if 0
llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0);
#else
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
#endif
#else /* ISPC_NVPTX_ENABLED */
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
#endif /* ISPC_NVPTX_ENABLED */
ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ);
// Emit code for false
@@ -1450,10 +1472,96 @@ static llvm::Value *
lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
llvm::Value *uniformCounterPtr,
llvm::Value *varyingCounterPtr,
const std::vector<int> &spans)
{
if (g->target->getISA() != Target::NVPTX)
{
const std::vector<int> &spans) {
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
// Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = ctx->BroadcastValue(
counter, LLVMTypes::Int32VectorType, "smear_counter");
// Figure out the offsets; this is a little bit tricky. As an example,
// consider a 2D tiled foreach loop, where we're running 8-wide and
// where the inner dimension has a stride of 4 and the outer dimension
// has a stride of 2. For the inner dimension, we want the offsets
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
const int vecWidth = 32;
std::vector<llvm::Constant*> constDeltaList;
for (int i = 0; i < vecWidth; ++i)
{
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
int prevDimSpanCount = 1;
for (int j = dim; j < nDims-1; ++j)
prevDimSpanCount *= spans[j+1];
d /= prevDimSpanCount;
// And now with what's left, figure out our own offset
delta[i] = d % spans[dim];
constDeltaList.push_back(LLVMInt8(delta[i]));
}
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
/*Module=*/*m->module,
/*Type=*/ArrayDelta,
/*isConstant=*/true,
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/0, // has initializer, specified below
/*Name=*/"constDeltaForeach");
#if 0
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
/*unsigned AddressSpace=*/4 /*constant*/);
#endif
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
globalDelta->setInitializer(constDelta);
llvm::Function *func_program_index = m->module->getFunction("__program_index");
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
std::vector<llvm::Value*> ptr_arrayidx_indices;
ptr_arrayidx_indices.push_back(LLVMInt32(0));
ptr_arrayidx_indices.push_back(laneIdx);
#if 1
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
// llvm::UndefValue(LLVMInt32Vector),
const_packed_41,
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
#endif
// Add the deltas to compute the varying counter values; store the
// result to memory and then return it directly as well.
#if 0
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
LLVMInt32Vector(delta), "iter_val");
#else
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
packed_43, "iter_val");
#endif
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
#endif /* ISPC_NVPTX_ENABLED */
// Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = ctx->BroadcastValue(
@@ -1486,93 +1594,6 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
LLVMInt32Vector(delta), "iter_val");
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
else /* NVPTX == true */
{
// Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = ctx->BroadcastValue(
counter, LLVMTypes::Int32VectorType, "smear_counter");
// Figure out the offsets; this is a little bit tricky. As an example,
// consider a 2D tiled foreach loop, where we're running 8-wide and
// where the inner dimension has a stride of 4 and the outer dimension
// has a stride of 2. For the inner dimension, we want the offsets
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
const int vecWidth = 32;
std::vector<llvm::Constant*> constDeltaList;
for (int i = 0; i < vecWidth; ++i)
{
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
int prevDimSpanCount = 1;
for (int j = dim; j < nDims-1; ++j)
prevDimSpanCount *= spans[j+1];
d /= prevDimSpanCount;
// And now with what's left, figure out our own offset
delta[i] = d % spans[dim];
constDeltaList.push_back(LLVMInt8(delta[i]));
}
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
/*Module=*/*m->module,
/*Type=*/ArrayDelta,
/*isConstant=*/true,
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/0, // has initializer, specified below
/*Name=*/"constDeltaForeach");
#if 0
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
/*unsigned AddressSpace=*/4 /*constant*/);
#endif
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
globalDelta->setInitializer(constDelta);
llvm::Function *func_program_index = m->module->getFunction("__program_index");
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
std::vector<llvm::Value*> ptr_arrayidx_indices;
ptr_arrayidx_indices.push_back(LLVMInt32(0));
ptr_arrayidx_indices.push_back(laneIdx);
#if 1
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
// llvm::UndefValue(LLVMInt32Vector),
const_packed_41,
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
#endif
// Add the deltas to compute the varying counter values; store the
// result to memory and then return it directly as well.
#if 0
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
LLVMInt32Vector(delta), "iter_val");
#else
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
packed_43, "iter_val");
#endif
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
}
@@ -1650,7 +1671,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// This should be caught during typechecking
AssertPos(pos, startExprs.size() == dimVariables.size() &&
endExprs.size() == dimVariables.size());
endExprs.size() == dimVariables.size());
int nDims = (int)dimVariables.size();
///////////////////////////////////////////////////////////////////////
@@ -1661,66 +1682,70 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
std::vector<int> span(nDims, 0);
#ifdef ISPC_NVPTX_ENABLED
const int vectorWidth =
g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
#else /* ISPC_NVPTX_ENABLED */
lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
#endif /* ISPC_NVPTX_ENABLED */
for (int i = 0; i < nDims; ++i) {
// Basic blocks that we'll fill in later with the looping logic for
// this dimension.
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
if (i < nDims-1)
// stepping for the innermost dimension is handled specially
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
// Basic blocks that we'll fill in later with the looping logic for
// this dimension.
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
if (i < nDims-1)
// stepping for the innermost dimension is handled specially
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
// Start and end value for this loop dimension
llvm::Value *sv = startExprs[i]->GetValue(ctx);
llvm::Value *ev = endExprs[i]->GetValue(ctx);
if (sv == NULL || ev == NULL)
return;
startVals.push_back(sv);
endVals.push_back(ev);
// Start and end value for this loop dimension
llvm::Value *sv = startExprs[i]->GetValue(ctx);
llvm::Value *ev = endExprs[i]->GetValue(ctx);
if (sv == NULL || ev == NULL)
return;
startVals.push_back(sv);
endVals.push_back(ev);
// nItems = endVal - startVal
llvm::Value *nItems =
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
// nItems = endVal - startVal
llvm::Value *nItems =
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
// nExtras = nItems % (span for this dimension)
// This gives us the number of extra elements we need to deal with
// at the end of the loop for this dimension that don't fit cleanly
// into a vector width.
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
LLVMInt32(span[i]), "nextras"));
// nExtras = nItems % (span for this dimension)
// This gives us the number of extra elements we need to deal with
// at the end of the loop for this dimension that don't fit cleanly
// into a vector width.
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
LLVMInt32(span[i]), "nextras"));
// alignedEnd = endVal - nExtras
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
nExtras[i], "aligned_end"));
// alignedEnd = endVal - nExtras
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
nExtras[i], "aligned_end"));
///////////////////////////////////////////////////////////////////////
// Each dimension has a loop counter that is a uniform value that
// goes from startVal to endVal, in steps of the span for this
// dimension. Its value is only used internally here for looping
// logic and isn't directly available in the user's program code.
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
"counter"));
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
///////////////////////////////////////////////////////////////////////
// Each dimension has a loop counter that is a uniform value that
// goes from startVal to endVal, in steps of the span for this
// dimension. Its value is only used internally here for looping
// logic and isn't directly available in the user's program code.
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
"counter"));
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
// There is also a varying variable that holds the set of index
// values for each dimension in the current loop iteration; this is
// the value that is program-visible.
dimVariables[i]->storagePtr =
ctx->AllocaInst(LLVMTypes::Int32VectorType,
dimVariables[i]->name.c_str());
dimVariables[i]->parentFunction = ctx->GetFunction();
ctx->EmitVariableDebugInfo(dimVariables[i]);
// There is also a varying variable that holds the set of index
// values for each dimension in the current loop iteration; this is
// the value that is program-visible.
dimVariables[i]->storagePtr =
ctx->AllocaInst(LLVMTypes::Int32VectorType,
dimVariables[i]->name.c_str());
dimVariables[i]->parentFunction = ctx->GetFunction();
ctx->EmitVariableDebugInfo(dimVariables[i]);
// Each dimension also maintains a mask that represents which of
// the varying elements in the current iteration should be
// processed. (i.e. this is used to disable the lanes that have
// out-of-bounds offsets.)
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
// Each dimension also maintains a mask that represents which of
// the varying elements in the current iteration should be
// processed. (i.e. this is used to disable the lanes that have
// out-of-bounds offsets.)
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
}
ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR);
@@ -1733,14 +1758,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// a given dimension in preparation for running through its loop again,
// after the enclosing level advances its counter.
for (int i = 0; i < nDims; ++i) {
ctx->SetCurrentBasicBlock(bbReset[i]);
if (i == 0)
ctx->BranchInst(bbExit);
else {
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
ctx->BranchInst(bbStep[i-1]);
}
ctx->SetCurrentBasicBlock(bbReset[i]);
if (i == 0)
ctx->BranchInst(bbExit);
else {
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
ctx->BranchInst(bbStep[i-1]);
}
}
///////////////////////////////////////////////////////////////////////////
@@ -1750,67 +1775,67 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// this for the innermost dimension, which has a more complex stepping
// structure..
for (int i = 0; i < nDims-1; ++i) {
ctx->SetCurrentBasicBlock(bbStep[i]);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[i]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
ctx->BranchInst(bbTest[i]);
ctx->SetCurrentBasicBlock(bbStep[i]);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[i]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
ctx->BranchInst(bbTest[i]);
}
///////////////////////////////////////////////////////////////////////////
// foreach_test (for all dimensions other than the innermost...)
std::vector<llvm::Value *> inExtras;
for (int i = 0; i < nDims-1; ++i) {
ctx->SetCurrentBasicBlock(bbTest[i]);
ctx->SetCurrentBasicBlock(bbTest[i]);
llvm::Value *haveExtras =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
endVals[i], alignedEnd[i], "have_extras");
llvm::Value *haveExtras =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
endVals[i], alignedEnd[i], "have_extras");
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
llvm::Value *atAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
counter, alignedEnd[i], "at_aligned_end");
llvm::Value *inEx =
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
atAlignedEnd, "in_extras");
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
llvm::Value *atAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
counter, alignedEnd[i], "at_aligned_end");
llvm::Value *inEx =
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
atAlignedEnd, "in_extras");
if (i == 0)
inExtras.push_back(inEx);
else
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
inExtras[i-1], "in_extras_all"));
if (i == 0)
inExtras.push_back(inEx);
else
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
inExtras[i-1], "in_extras_all"));
llvm::Value *varyingCounter =
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
dimVariables[i]->storagePtr, span);
llvm::Value *varyingCounter =
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
dimVariables[i]->storagePtr, span);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
// Do a vector compare of its value to the end value to generate a
// mask for this last bit of work.
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
// Do a vector compare of its value to the end value to generate a
// mask for this last bit of work.
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
if (i == 0)
ctx->StoreInst(emask, extrasMaskPtrs[i]);
else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask");
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
}
if (i == 0)
ctx->StoreInst(emask, extrasMaskPtrs[i]);
else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask");
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
}
llvm::Value *notAtEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[i]);
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
llvm::Value *notAtEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[i]);
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
}
///////////////////////////////////////////////////////////////////////////
@@ -1847,18 +1872,18 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// (i.e. processing extra elements that don't exactly fit into a
// vector).
llvm::BasicBlock *bbOuterInExtras =
ctx->CreateBasicBlock("outer_in_extras");
ctx->CreateBasicBlock("outer_in_extras");
llvm::BasicBlock *bbOuterNotInExtras =
ctx->CreateBasicBlock("outer_not_in_extras");
ctx->CreateBasicBlock("outer_not_in_extras");
ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
if (inExtras.size())
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
inExtras.back());
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
inExtras.back());
else
// for a 1D iteration domain, we certainly don't have any enclosing
// dimensions that are processing extra elements.
ctx->BranchInst(bbOuterNotInExtras);
// for a 1D iteration domain, we certainly don't have any enclosing
// dimensions that are processing extra elements.
ctx->BranchInst(bbOuterNotInExtras);
///////////////////////////////////////////////////////////////////////////
// One or more outer dimensions in extras, so we need to mask for the loop
@@ -1873,21 +1898,21 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// // run loop body with mask
// }
llvm::BasicBlock *bbAllInnerPartialOuter =
ctx->CreateBasicBlock("all_inner_partial_outer");
ctx->CreateBasicBlock("all_inner_partial_outer");
llvm::BasicBlock *bbPartial =
ctx->CreateBasicBlock("both_partial");
ctx->CreateBasicBlock("both_partial");
ctx->SetCurrentBasicBlock(bbOuterInExtras); {
// Update the varying counter value here, since all subsequent
// blocks along this path need it.
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
// Update the varying counter value here, since all subsequent
// blocks along this path need it.
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
// here we just check to see if counter < alignedEnd
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
// here we just check to see if counter < alignedEnd
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
}
// Below we have a basic block that runs the loop body code for the
@@ -1906,53 +1931,53 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// should step the loop counter for the next enclosing dimension
// instead.
llvm::Value *stepIndexAfterMaskedBodyPtr =
ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
///////////////////////////////////////////////////////////////////////////
// We're in the inner loop part where the only masking is due to outer
// dimensions but the innermost dimension fits fully into a vector's
// width. Set the mask and jump to the masked loop body.
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
llvm::Value *mask;
if (nDims == 1)
// 1D loop; we shouldn't ever get here anyway
mask = LLVMMaskAllOff;
else
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
llvm::Value *mask;
if (nDims == 1)
// 1D loop; we shouldn't ever get here anyway
mask = LLVMMaskAllOff;
else
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
ctx->SetInternalMask(mask);
ctx->SetInternalMask(mask);
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
}
///////////////////////////////////////////////////////////////////////////
// We need to include the effect of the innermost dimension in the mask
// for the final bits here
ctx->SetCurrentBasicBlock(bbPartial); {
llvm::Value *varyingCounter =
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *varyingCounter =
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
if (nDims == 1) {
ctx->SetInternalMask(emask);
}
else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask");
ctx->SetInternalMask(newMask);
}
if (nDims == 1) {
ctx->SetInternalMask(emask);
}
else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask");
ctx->SetInternalMask(newMask);
}
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
}
///////////////////////////////////////////////////////////////////////////
@@ -1968,14 +1993,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// // run loop body with mask
// }
llvm::BasicBlock *bbPartialInnerAllOuter =
ctx->CreateBasicBlock("partial_inner_all_outer");
ctx->CreateBasicBlock("partial_inner_all_outer");
ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
beforeAlignedEnd);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
beforeAlignedEnd);
}
///////////////////////////////////////////////////////////////////////////
@@ -1985,26 +2010,26 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// value of the varying loop counter and have the statements in the
// loop body emit their code.
llvm::BasicBlock *bbFullBodyContinue =
ctx->CreateBasicBlock("foreach_full_continue");
ctx->CreateBasicBlock("foreach_full_continue");
ctx->SetCurrentBasicBlock(bbFullBody); {
ctx->SetInternalMask(LLVMMaskAllOn);
ctx->SetBlockEntryMask(LLVMMaskAllOn);
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
ctx->SetContinueTarget(bbFullBodyContinue);
ctx->AddInstrumentationPoint("foreach loop body (all on)");
stmts->EmitCode(ctx);
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
ctx->BranchInst(bbFullBodyContinue);
ctx->SetInternalMask(LLVMMaskAllOn);
ctx->SetBlockEntryMask(LLVMMaskAllOn);
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
ctx->SetContinueTarget(bbFullBodyContinue);
ctx->AddInstrumentationPoint("foreach loop body (all on)");
stmts->EmitCode(ctx);
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
ctx->BranchInst(bbFullBodyContinue);
}
ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
ctx->RestoreContinuedLanes();
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterNotInExtras);
ctx->RestoreContinuedLanes();
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterNotInExtras);
}
///////////////////////////////////////////////////////////////////////////
@@ -2012,33 +2037,33 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// less than the end value, in which case we need to run the body one
// more time to get the extra bits.
llvm::BasicBlock *bbSetInnerMask =
ctx->CreateBasicBlock("partial_inner_only");
ctx->CreateBasicBlock("partial_inner_only");
ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeFullEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[nDims-1], "before_full_end");
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeFullEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[nDims-1], "before_full_end");
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
}
///////////////////////////////////////////////////////////////////////////
// The outer dimensions are all on, so the mask is just given by the
// mask for the innermost dimension
ctx->SetCurrentBasicBlock(bbSetInnerMask); {
llvm::Value *varyingCounter =
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
ctx->SetInternalMask(emask);
ctx->SetBlockEntryMask(emask);
llvm::Value *varyingCounter =
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
ctx->SetInternalMask(emask);
ctx->SetBlockEntryMask(emask);
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
}
///////////////////////////////////////////////////////////////////////////
@@ -2048,34 +2073,34 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// mask known to be all-on, which in turn leads to more efficient code
// for that case.
llvm::BasicBlock *bbStepInnerIndex =
ctx->CreateBasicBlock("step_inner_index");
ctx->CreateBasicBlock("step_inner_index");
llvm::BasicBlock *bbMaskedBodyContinue =
ctx->CreateBasicBlock("foreach_masked_continue");
ctx->CreateBasicBlock("foreach_masked_continue");
ctx->SetCurrentBasicBlock(bbMaskedBody); {
ctx->AddInstrumentationPoint("foreach loop body (masked)");
ctx->SetContinueTarget(bbMaskedBodyContinue);
ctx->DisableGatherScatterWarnings();
ctx->SetBlockEntryMask(ctx->GetFullMask());
stmts->EmitCode(ctx);
ctx->EnableGatherScatterWarnings();
ctx->BranchInst(bbMaskedBodyContinue);
ctx->AddInstrumentationPoint("foreach loop body (masked)");
ctx->SetContinueTarget(bbMaskedBodyContinue);
ctx->DisableGatherScatterWarnings();
ctx->SetBlockEntryMask(ctx->GetFullMask());
stmts->EmitCode(ctx);
ctx->EnableGatherScatterWarnings();
ctx->BranchInst(bbMaskedBodyContinue);
}
ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
ctx->RestoreContinuedLanes();
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
ctx->RestoreContinuedLanes();
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
}
///////////////////////////////////////////////////////////////////////////
// step the innermost index, for the case where we're doing the
// innermost for loop over full vectors.
ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterInExtras);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterInExtras);
}
///////////////////////////////////////////////////////////////////////////
@@ -2262,8 +2287,12 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
// math...)
// Get the "program index" vector value
#ifdef ISPC_NVPTX_ENABLED
llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ?
ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
#else /* ISPC_NVPTX_ENABLED */
llvm::Value *programIndex = ctx->ProgramIndexVector();
#endif /* ISPC_NVPTX_ENABLED */
// And smear the current lane out to a vector
llvm::Value *firstSet32 =
@@ -2460,19 +2489,22 @@ ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const {
// And load the corresponding element value from the temporary
// memory storing the value of the varying expr.
llvm::Value *uniqueValue;
if (g->target->getISA() != Target::NVPTX)
{
llvm::Value *uniqueValuePtr =
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
"unique_index_ptr");
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
}
else /* in case of PTX target, use __shfl PTX intrinsics via __insert/__extract function */
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type);
uniqueValue = ctx->Extract(exprValue, firstSet32);
}
else
{
#endif /* ISPC_NVPTX_ENABLED */
llvm::Value *uniqueValuePtr =
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
"unique_index_ptr");
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
#ifdef ISPC_NVPTX_ENABLED
}
#endif /* ISPC_NVPTX_ENABLED */
// If it's a varying pointer type, need to convert from the int
// type we store in the vector to the actual pointer type
if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
@@ -3379,8 +3411,12 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
}
// Now we can emit code to call __do_print()
#ifdef ISPC_NVPTX_ENABLED
llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ?
m->module->getFunction("__do_print") : m->module->getFunction("__do_print_nvptx");
#else /* ISPC_NVPTX_ENABLED */
llvm::Function *printFunc = m->module->getFunction("__do_print");
#endif /* ISPC_NVPTX_ENABLED */
AssertPos(pos, printFunc);
llvm::Value *mask = ctx->GetFullMask();