generating proper tasking function for nvptx
This commit is contained in:
124
func.cpp
124
func.cpp
@@ -240,78 +240,82 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
// thread index, and the thread count variables.
|
// thread index, and the thread count variables.
|
||||||
llvm::Function::arg_iterator argIter = function->arg_begin();
|
llvm::Function::arg_iterator argIter = function->arg_begin();
|
||||||
llvm::Value *structParamPtr = argIter++;
|
llvm::Value *structParamPtr = argIter++;
|
||||||
llvm::Value *threadIndex = argIter++;
|
|
||||||
llvm::Value *threadCount = argIter++;
|
|
||||||
llvm::Value *taskIndex = argIter++;
|
|
||||||
llvm::Value *taskCount = argIter++;
|
|
||||||
llvm::Value *taskIndex0 = argIter++;
|
|
||||||
llvm::Value *taskIndex1 = argIter++;
|
|
||||||
llvm::Value *taskIndex2 = argIter++;
|
|
||||||
llvm::Value *taskCount0 = argIter++;
|
|
||||||
llvm::Value *taskCount1 = argIter++;
|
|
||||||
llvm::Value *taskCount2 = argIter++;
|
|
||||||
|
|
||||||
// Copy the function parameter values from the structure into local
|
// Copy the function parameter values from the structure into local
|
||||||
// storage
|
// storage
|
||||||
for (unsigned int i = 0; i < args.size(); ++i)
|
for (unsigned int i = 0; i < args.size(); ++i)
|
||||||
lCopyInTaskParameter(i, structParamPtr, args, ctx);
|
lCopyInTaskParameter(i, structParamPtr, args, ctx);
|
||||||
|
|
||||||
if (type->isUnmasked == false) {
|
if (type->isUnmasked == false) {
|
||||||
// Copy in the mask as well.
|
// Copy in the mask as well.
|
||||||
int nArgs = (int)args.size();
|
int nArgs = (int)args.size();
|
||||||
// The mask is the last parameter in the argument structure
|
// The mask is the last parameter in the argument structure
|
||||||
llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL,
|
llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL,
|
||||||
"task_struct_mask");
|
"task_struct_mask");
|
||||||
llvm::Value *ptrval = ctx->LoadInst(ptr, "mask");
|
llvm::Value *ptrval = ctx->LoadInst(ptr, "mask");
|
||||||
ctx->SetFunctionMask(ptrval);
|
ctx->SetFunctionMask(ptrval);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy threadIndex and threadCount into stack-allocated storage so
|
if (g->target->getISA() != Target::NVPTX64)
|
||||||
// that their symbols point to something reasonable.
|
{
|
||||||
threadIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadIndex");
|
llvm::Value *threadIndex = argIter++;
|
||||||
ctx->StoreInst(threadIndex, threadIndexSym->storagePtr);
|
llvm::Value *threadCount = argIter++;
|
||||||
|
llvm::Value *taskIndex = argIter++;
|
||||||
|
llvm::Value *taskCount = argIter++;
|
||||||
|
llvm::Value *taskIndex0 = argIter++;
|
||||||
|
llvm::Value *taskIndex1 = argIter++;
|
||||||
|
llvm::Value *taskIndex2 = argIter++;
|
||||||
|
llvm::Value *taskCount0 = argIter++;
|
||||||
|
llvm::Value *taskCount1 = argIter++;
|
||||||
|
llvm::Value *taskCount2 = argIter++;
|
||||||
|
|
||||||
threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount");
|
// Copy threadIndex and threadCount into stack-allocated storage so
|
||||||
ctx->StoreInst(threadCount, threadCountSym->storagePtr);
|
// that their symbols point to something reasonable.
|
||||||
|
threadIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadIndex");
|
||||||
|
ctx->StoreInst(threadIndex, threadIndexSym->storagePtr);
|
||||||
|
|
||||||
// Copy taskIndex and taskCount into stack-allocated storage so
|
threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount");
|
||||||
// that their symbols point to something reasonable.
|
ctx->StoreInst(threadCount, threadCountSym->storagePtr);
|
||||||
taskIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex");
|
|
||||||
ctx->StoreInst(taskIndex, taskIndexSym->storagePtr);
|
|
||||||
|
|
||||||
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
// Copy taskIndex and taskCount into stack-allocated storage so
|
||||||
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
// that their symbols point to something reasonable.
|
||||||
|
taskIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex");
|
||||||
|
ctx->StoreInst(taskIndex, taskIndexSym->storagePtr);
|
||||||
|
|
||||||
/* nvptx map:
|
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
||||||
* programCount : llvm.nvvm.read.ptx.sreg.warpsize
|
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
||||||
* programIndex : llvm.ptx.read.laneid _or_ ed.ptx.sreg.tid.llvm.nvvm.read.ptx.sreg.tid.x & programCount
|
|
||||||
* taskIndex0 : llvm.nvvm.read.ptx.sreg.ctaid.x
|
/* nvptx map:
|
||||||
* taskIndex1 : llvm.nvvm.read.ptx.sreg.ctaid.y
|
* programCount : llvm.nvvm.read.ptx.sreg.warpsize
|
||||||
* taskIndex3 : llvm.nvvm.read.ptx.sreg.ctaid.z
|
* programIndex : llvm.ptx.read.laneid _or_ ed.ptx.sreg.tid.llvm.nvvm.read.ptx.sreg.tid.x & programCount
|
||||||
* taskCount0 : llvm.nvvm.read.ptx.sreg.nctaid.x
|
* taskIndex0 : llvm.nvvm.read.ptx.sreg.ctaid.x
|
||||||
* taskCount1 : llvm.nvvm.read.ptx.sreg.nctaid.y
|
* taskIndex1 : llvm.nvvm.read.ptx.sreg.ctaid.y
|
||||||
* taskCount3 : llvm.nvvm.read.ptx.sreg.nctaid.z
|
* taskIndex3 : llvm.nvvm.read.ptx.sreg.ctaid.z
|
||||||
*/
|
* taskCount0 : llvm.nvvm.read.ptx.sreg.nctaid.x
|
||||||
|
* taskCount1 : llvm.nvvm.read.ptx.sreg.nctaid.y
|
||||||
// llvm.nvvm.read.ptx.sreg.ctaid.x
|
* taskCount3 : llvm.nvvm.read.ptx.sreg.nctaid.z
|
||||||
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
*/
|
||||||
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
|
||||||
// llvm.nvvm.read.ptx.sreg.ctaid.y
|
// llvm.nvvm.read.ptx.sreg.ctaid.x
|
||||||
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
||||||
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
||||||
// llvm.nvvm.read.ptx.sreg.ctaid.z
|
// llvm.nvvm.read.ptx.sreg.ctaid.y
|
||||||
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
||||||
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
||||||
|
// llvm.nvvm.read.ptx.sreg.ctaid.z
|
||||||
// llvm.nvvm.read.ptx.sreg.nctaid.x
|
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
||||||
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
||||||
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
|
||||||
// llvm.nvvm.read.ptx.sreg.nctaid.y
|
// llvm.nvvm.read.ptx.sreg.nctaid.x
|
||||||
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
||||||
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
||||||
// llvm.nvvm.read.ptx.sreg.nctaid.z
|
// llvm.nvvm.read.ptx.sreg.nctaid.y
|
||||||
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
||||||
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
||||||
|
// llvm.nvvm.read.ptx.sreg.nctaid.z
|
||||||
|
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
||||||
|
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Regular, non-task function
|
// Regular, non-task function
|
||||||
|
|||||||
23
type.cpp
23
type.cpp
@@ -2957,16 +2957,19 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
|||||||
// hold them until the task actually runs.)
|
// hold them until the task actually runs.)
|
||||||
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
||||||
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // threadIndex
|
if (g->target->getISA() != Target::NVPTX64)
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // threadCount
|
{
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
|
callTypes.push_back(LLVMTypes::Int32Type); // threadIndex
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount
|
callTypes.push_back(LLVMTypes::Int32Type); // threadCount
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
|
||||||
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
|
||||||
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
// Otherwise we already have the types of the arguments
|
// Otherwise we already have the types of the arguments
|
||||||
|
|||||||
Reference in New Issue
Block a user