generating proper tasking function for nvptx

This commit is contained in:
Evghenii
2013-10-28 11:36:08 +01:00
parent 8391d05697
commit b68a751f4e
2 changed files with 77 additions and 70 deletions

124
func.cpp
View File

@@ -240,78 +240,82 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
// thread index, and the thread count variables. // thread index, and the thread count variables.
llvm::Function::arg_iterator argIter = function->arg_begin(); llvm::Function::arg_iterator argIter = function->arg_begin();
llvm::Value *structParamPtr = argIter++; llvm::Value *structParamPtr = argIter++;
llvm::Value *threadIndex = argIter++;
llvm::Value *threadCount = argIter++;
llvm::Value *taskIndex = argIter++;
llvm::Value *taskCount = argIter++;
llvm::Value *taskIndex0 = argIter++;
llvm::Value *taskIndex1 = argIter++;
llvm::Value *taskIndex2 = argIter++;
llvm::Value *taskCount0 = argIter++;
llvm::Value *taskCount1 = argIter++;
llvm::Value *taskCount2 = argIter++;
// Copy the function parameter values from the structure into local // Copy the function parameter values from the structure into local
// storage // storage
for (unsigned int i = 0; i < args.size(); ++i) for (unsigned int i = 0; i < args.size(); ++i)
lCopyInTaskParameter(i, structParamPtr, args, ctx); lCopyInTaskParameter(i, structParamPtr, args, ctx);
if (type->isUnmasked == false) { if (type->isUnmasked == false) {
// Copy in the mask as well. // Copy in the mask as well.
int nArgs = (int)args.size(); int nArgs = (int)args.size();
// The mask is the last parameter in the argument structure // The mask is the last parameter in the argument structure
llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL, llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL,
"task_struct_mask"); "task_struct_mask");
llvm::Value *ptrval = ctx->LoadInst(ptr, "mask"); llvm::Value *ptrval = ctx->LoadInst(ptr, "mask");
ctx->SetFunctionMask(ptrval); ctx->SetFunctionMask(ptrval);
} }
// Copy threadIndex and threadCount into stack-allocated storage so if (g->target->getISA() != Target::NVPTX64)
// that their symbols point to something reasonable. {
threadIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadIndex"); llvm::Value *threadIndex = argIter++;
ctx->StoreInst(threadIndex, threadIndexSym->storagePtr); llvm::Value *threadCount = argIter++;
llvm::Value *taskIndex = argIter++;
llvm::Value *taskCount = argIter++;
llvm::Value *taskIndex0 = argIter++;
llvm::Value *taskIndex1 = argIter++;
llvm::Value *taskIndex2 = argIter++;
llvm::Value *taskCount0 = argIter++;
llvm::Value *taskCount1 = argIter++;
llvm::Value *taskCount2 = argIter++;
threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount"); // Copy threadIndex and threadCount into stack-allocated storage so
ctx->StoreInst(threadCount, threadCountSym->storagePtr); // that their symbols point to something reasonable.
threadIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadIndex");
ctx->StoreInst(threadIndex, threadIndexSym->storagePtr);
// Copy taskIndex and taskCount into stack-allocated storage so threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount");
// that their symbols point to something reasonable. ctx->StoreInst(threadCount, threadCountSym->storagePtr);
taskIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex");
ctx->StoreInst(taskIndex, taskIndexSym->storagePtr);
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); // Copy taskIndex and taskCount into stack-allocated storage so
ctx->StoreInst(taskCount, taskCountSym->storagePtr); // that their symbols point to something reasonable.
taskIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex");
ctx->StoreInst(taskIndex, taskIndexSym->storagePtr);
/* nvptx map: taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
* programCount : llvm.nvvm.read.ptx.sreg.warpsize ctx->StoreInst(taskCount, taskCountSym->storagePtr);
* programIndex : llvm.ptx.read.laneid _or_ ed.ptx.sreg.tid.llvm.nvvm.read.ptx.sreg.tid.x & programCount
* taskIndex0 : llvm.nvvm.read.ptx.sreg.ctaid.x /* nvptx map:
* taskIndex1 : llvm.nvvm.read.ptx.sreg.ctaid.y * programCount : llvm.nvvm.read.ptx.sreg.warpsize
* taskIndex3 : llvm.nvvm.read.ptx.sreg.ctaid.z * programIndex : llvm.ptx.read.laneid _or_ ed.ptx.sreg.tid.llvm.nvvm.read.ptx.sreg.tid.x & programCount
* taskCount0 : llvm.nvvm.read.ptx.sreg.nctaid.x * taskIndex0 : llvm.nvvm.read.ptx.sreg.ctaid.x
* taskCount1 : llvm.nvvm.read.ptx.sreg.nctaid.y * taskIndex1 : llvm.nvvm.read.ptx.sreg.ctaid.y
* taskCount3 : llvm.nvvm.read.ptx.sreg.nctaid.z * taskIndex3 : llvm.nvvm.read.ptx.sreg.ctaid.z
*/ * taskCount0 : llvm.nvvm.read.ptx.sreg.nctaid.x
* taskCount1 : llvm.nvvm.read.ptx.sreg.nctaid.y
// llvm.nvvm.read.ptx.sreg.ctaid.x * taskCount3 : llvm.nvvm.read.ptx.sreg.nctaid.z
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0"); */
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
// llvm.nvvm.read.ptx.sreg.ctaid.y // llvm.nvvm.read.ptx.sreg.ctaid.x
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1"); taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr); ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
// llvm.nvvm.read.ptx.sreg.ctaid.z // llvm.nvvm.read.ptx.sreg.ctaid.y
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2"); taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr); ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
// llvm.nvvm.read.ptx.sreg.ctaid.z
// llvm.nvvm.read.ptx.sreg.nctaid.x taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0"); ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
// llvm.nvvm.read.ptx.sreg.nctaid.y // llvm.nvvm.read.ptx.sreg.nctaid.x
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1"); taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr); ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
// llvm.nvvm.read.ptx.sreg.nctaid.z // llvm.nvvm.read.ptx.sreg.nctaid.y
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2"); taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr); ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
// llvm.nvvm.read.ptx.sreg.nctaid.z
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
}
} }
else { else {
// Regular, non-task function // Regular, non-task function

View File

@@ -2957,16 +2957,19 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
// hold them until the task actually runs.) // hold them until the task actually runs.)
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes); llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
callTypes.push_back(llvm::PointerType::getUnqual(st)); callTypes.push_back(llvm::PointerType::getUnqual(st));
callTypes.push_back(LLVMTypes::Int32Type); // threadIndex if (g->target->getISA() != Target::NVPTX64)
callTypes.push_back(LLVMTypes::Int32Type); // threadCount {
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex callTypes.push_back(LLVMTypes::Int32Type); // threadIndex
callTypes.push_back(LLVMTypes::Int32Type); // taskCount callTypes.push_back(LLVMTypes::Int32Type); // threadCount
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0 callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1 callTypes.push_back(LLVMTypes::Int32Type); // taskCount
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2 callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0 callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1 callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2 callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
}
} }
else else
// Otherwise we already have the types of the arguments // Otherwise we already have the types of the arguments