added blockIndex computations

This commit is contained in:
Evghenii
2013-10-28 10:18:30 +01:00
parent ac095dbf3e
commit 8391d05697
4 changed files with 133 additions and 0 deletions

View File

@@ -611,6 +611,14 @@ lSetInternalFunctions(llvm::Module *module) {
"__vec4_add_int32",
"__vselect_float",
"__vselect_i32",
"__tid_x",
"__ctaid_x",
"__ctaid_y",
"__ctaid_z",
"__nctaid_x",
"__nctaid_y",
"__nctaid_z",
"__warpsize"
};
int count = sizeof(names) / sizeof(names[0]);

View File

@@ -56,6 +56,66 @@ gen_scatter(i64)
gen_scatter(double)
;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() nounwind readnone
define i32 @__tid_x() nounwind readnone alwaysinline
{
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
ret i32 %tid
}
define i32 @__warpsize() nounwind readnone alwaysinline
{
%tid = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
ret i32 %tid
}
define i32 @__ctaid_x() nounwind readnone alwaysinline
{
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
ret i32 %bid
}
define i32 @__ctaid_y() nounwind readnone alwaysinline
{
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
ret i32 %bid
}
define i32 @__ctaid_z() nounwind readnone alwaysinline
{
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
ret i32 %bid
}
define i32 @__nctaid_x() nounwind readnone alwaysinline
{
%nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
ret i32 %nb
}
define i32 @__nctaid_y() nounwind readnone alwaysinline
{
%nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
ret i32 %nb
}
define i32 @__nctaid_z() nounwind readnone alwaysinline
{
%nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
ret i32 %nb
}
;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
<1 x i32> %mask) nounwind readnone alwaysinline {
; %mv = trunc <1 x i32> %mask to <1 x i8>

View File

@@ -281,18 +281,35 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
/* nvptx map:
* programCount : llvm.nvvm.read.ptx.sreg.warpsize
* programIndex : llvm.ptx.read.laneid _or_ ed.ptx.sreg.tid.llvm.nvvm.read.ptx.sreg.tid.x & programCount
* taskIndex0 : llvm.nvvm.read.ptx.sreg.ctaid.x
* taskIndex1 : llvm.nvvm.read.ptx.sreg.ctaid.y
* taskIndex3 : llvm.nvvm.read.ptx.sreg.ctaid.z
* taskCount0 : llvm.nvvm.read.ptx.sreg.nctaid.x
* taskCount1 : llvm.nvvm.read.ptx.sreg.nctaid.y
* taskCount3 : llvm.nvvm.read.ptx.sreg.nctaid.z
*/
// llvm.nvvm.read.ptx.sreg.ctaid.x
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
// llvm.nvvm.read.ptx.sreg.ctaid.y
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
// llvm.nvvm.read.ptx.sreg.ctaid.z
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
// llvm.nvvm.read.ptx.sreg.nctaid.x
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
// llvm.nvvm.read.ptx.sreg.nctaid.y
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
// llvm.nvvm.read.ptx.sreg.nctaid.z
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
}

View File

@@ -57,6 +57,54 @@
#error Unknown value of ISPC_MASK_BITS
#endif
///////////////////////////////////////////////////////////////////////////
// CUDA Specific primitives
//
#define CUDABLOCKSIZE 128
__declspec(safe,cost0)
static inline uniform int blockIndex0()
{
return __ctaid_x();
}
__declspec(safe,cost0)
static inline uniform int blockIndex1()
{
return __ctaid_y();
}
__declspec(safe,cost0)
static inline uniform int blockIndex2()
{
return __ctaid_y();
}
__declspec(safe,cost0)
static inline uniform int blockCount0()
{
return __nctaid_x();
}
__declspec(safe,cost0)
static inline uniform int blockCount1()
{
return __nctaid_y();
}
__declspec(safe,cost0)
static inline uniform int blockCount2()
{
return __nctaid_z();
}
__declspec(safe,cost0)
static inline uniform int warpSize()
{
return __warpsize();
}
__declspec(safe,cost0)
static inline uniform int laneIndex()
{
return __tid_x() & (warpSize()-1);
}
///////////////////////////////////////////////////////////////////////////
// Low level primitives