added blockIndex computations
This commit is contained in:
@@ -611,6 +611,14 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__vec4_add_int32",
|
||||
"__vselect_float",
|
||||
"__vselect_i32",
|
||||
"__tid_x",
|
||||
"__ctaid_x",
|
||||
"__ctaid_y",
|
||||
"__ctaid_z",
|
||||
"__nctaid_x",
|
||||
"__nctaid_y",
|
||||
"__nctaid_z",
|
||||
"__warpsize"
|
||||
};
|
||||
|
||||
int count = sizeof(names) / sizeof(names[0]);
|
||||
|
||||
@@ -56,6 +56,66 @@ gen_scatter(i64)
|
||||
gen_scatter(double)
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() nounwind readnone
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() nounwind readnone
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() nounwind readnone
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() nounwind readnone
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() nounwind readnone
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() nounwind readnone
|
||||
|
||||
define i32 @__tid_x() nounwind readnone alwaysinline
|
||||
{
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
ret i32 %tid
|
||||
}
|
||||
define i32 @__warpsize() nounwind readnone alwaysinline
|
||||
{
|
||||
%tid = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
|
||||
ret i32 %tid
|
||||
}
|
||||
|
||||
|
||||
define i32 @__ctaid_x() nounwind readnone alwaysinline
|
||||
{
|
||||
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
ret i32 %bid
|
||||
}
|
||||
define i32 @__ctaid_y() nounwind readnone alwaysinline
|
||||
{
|
||||
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
|
||||
ret i32 %bid
|
||||
}
|
||||
define i32 @__ctaid_z() nounwind readnone alwaysinline
|
||||
{
|
||||
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
|
||||
ret i32 %bid
|
||||
}
|
||||
|
||||
define i32 @__nctaid_x() nounwind readnone alwaysinline
|
||||
{
|
||||
%nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
|
||||
ret i32 %nb
|
||||
}
|
||||
define i32 @__nctaid_y() nounwind readnone alwaysinline
|
||||
{
|
||||
%nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
|
||||
ret i32 %nb
|
||||
}
|
||||
define i32 @__nctaid_z() nounwind readnone alwaysinline
|
||||
{
|
||||
%nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
|
||||
ret i32 %nb
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %mv = trunc <1 x i32> %mask to <1 x i8>
|
||||
|
||||
17
func.cpp
17
func.cpp
@@ -281,18 +281,35 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
|
||||
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
||||
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
||||
|
||||
/* nvptx map:
|
||||
* programCount : llvm.nvvm.read.ptx.sreg.warpsize
|
||||
* programIndex : llvm.ptx.read.laneid _or_ ed.ptx.sreg.tid.llvm.nvvm.read.ptx.sreg.tid.x & programCount
|
||||
* taskIndex0 : llvm.nvvm.read.ptx.sreg.ctaid.x
|
||||
* taskIndex1 : llvm.nvvm.read.ptx.sreg.ctaid.y
|
||||
* taskIndex3 : llvm.nvvm.read.ptx.sreg.ctaid.z
|
||||
* taskCount0 : llvm.nvvm.read.ptx.sreg.nctaid.x
|
||||
* taskCount1 : llvm.nvvm.read.ptx.sreg.nctaid.y
|
||||
* taskCount3 : llvm.nvvm.read.ptx.sreg.nctaid.z
|
||||
*/
|
||||
|
||||
// llvm.nvvm.read.ptx.sreg.ctaid.x
|
||||
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
||||
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
||||
// llvm.nvvm.read.ptx.sreg.ctaid.y
|
||||
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
||||
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
||||
// llvm.nvvm.read.ptx.sreg.ctaid.z
|
||||
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
||||
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
||||
|
||||
// llvm.nvvm.read.ptx.sreg.nctaid.x
|
||||
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
||||
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
||||
// llvm.nvvm.read.ptx.sreg.nctaid.y
|
||||
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
||||
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
||||
// llvm.nvvm.read.ptx.sreg.nctaid.z
|
||||
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
||||
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
||||
}
|
||||
|
||||
48
stdlib.ispc
48
stdlib.ispc
@@ -57,6 +57,54 @@
|
||||
#error Unknown value of ISPC_MASK_BITS
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// CUDA Specific primitives
|
||||
//
|
||||
#define CUDABLOCKSIZE 128
|
||||
__declspec(safe,cost0)
|
||||
static inline uniform int blockIndex0()
|
||||
{
|
||||
return __ctaid_x();
|
||||
}
|
||||
__declspec(safe,cost0)
|
||||
static inline uniform int blockIndex1()
|
||||
{
|
||||
return __ctaid_y();
|
||||
}
|
||||
__declspec(safe,cost0)
|
||||
static inline uniform int blockIndex2()
|
||||
{
|
||||
return __ctaid_y();
|
||||
}
|
||||
|
||||
__declspec(safe,cost0)
|
||||
static inline uniform int blockCount0()
|
||||
{
|
||||
return __nctaid_x();
|
||||
}
|
||||
__declspec(safe,cost0)
|
||||
static inline uniform int blockCount1()
|
||||
{
|
||||
return __nctaid_y();
|
||||
}
|
||||
__declspec(safe,cost0)
|
||||
static inline uniform int blockCount2()
|
||||
{
|
||||
return __nctaid_z();
|
||||
}
|
||||
__declspec(safe,cost0)
|
||||
static inline uniform int warpSize()
|
||||
{
|
||||
return __warpsize();
|
||||
}
|
||||
__declspec(safe,cost0)
|
||||
static inline uniform int laneIndex()
|
||||
{
|
||||
return __tid_x() & (warpSize()-1);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Low level primitives
|
||||
|
||||
|
||||
Reference in New Issue
Block a user