added blockIndex computations
This commit is contained in:
@@ -611,6 +611,14 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__vec4_add_int32",
|
"__vec4_add_int32",
|
||||||
"__vselect_float",
|
"__vselect_float",
|
||||||
"__vselect_i32",
|
"__vselect_i32",
|
||||||
|
"__tid_x",
|
||||||
|
"__ctaid_x",
|
||||||
|
"__ctaid_y",
|
||||||
|
"__ctaid_z",
|
||||||
|
"__nctaid_x",
|
||||||
|
"__nctaid_y",
|
||||||
|
"__nctaid_z",
|
||||||
|
"__warpsize"
|
||||||
};
|
};
|
||||||
|
|
||||||
int count = sizeof(names) / sizeof(names[0]);
|
int count = sizeof(names) / sizeof(names[0]);
|
||||||
|
|||||||
@@ -56,6 +56,66 @@ gen_scatter(i64)
|
|||||||
gen_scatter(double)
|
gen_scatter(double)
|
||||||
|
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone
|
||||||
|
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone
|
||||||
|
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() nounwind readnone
|
||||||
|
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() nounwind readnone
|
||||||
|
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() nounwind readnone
|
||||||
|
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() nounwind readnone
|
||||||
|
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() nounwind readnone
|
||||||
|
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() nounwind readnone
|
||||||
|
|
||||||
|
define i32 @__tid_x() nounwind readnone alwaysinline
|
||||||
|
{
|
||||||
|
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||||
|
ret i32 %tid
|
||||||
|
}
|
||||||
|
define i32 @__warpsize() nounwind readnone alwaysinline
|
||||||
|
{
|
||||||
|
%tid = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
|
||||||
|
ret i32 %tid
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define i32 @__ctaid_x() nounwind readnone alwaysinline
|
||||||
|
{
|
||||||
|
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||||
|
ret i32 %bid
|
||||||
|
}
|
||||||
|
define i32 @__ctaid_y() nounwind readnone alwaysinline
|
||||||
|
{
|
||||||
|
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
|
||||||
|
ret i32 %bid
|
||||||
|
}
|
||||||
|
define i32 @__ctaid_z() nounwind readnone alwaysinline
|
||||||
|
{
|
||||||
|
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
|
||||||
|
ret i32 %bid
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @__nctaid_x() nounwind readnone alwaysinline
|
||||||
|
{
|
||||||
|
%nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
|
||||||
|
ret i32 %nb
|
||||||
|
}
|
||||||
|
define i32 @__nctaid_y() nounwind readnone alwaysinline
|
||||||
|
{
|
||||||
|
%nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
|
||||||
|
ret i32 %nb
|
||||||
|
}
|
||||||
|
define i32 @__nctaid_z() nounwind readnone alwaysinline
|
||||||
|
{
|
||||||
|
%nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
|
||||||
|
ret i32 %nb
|
||||||
|
}
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
||||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||||
; %mv = trunc <1 x i32> %mask to <1 x i8>
|
; %mv = trunc <1 x i32> %mask to <1 x i8>
|
||||||
|
|||||||
17
func.cpp
17
func.cpp
@@ -282,17 +282,34 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
||||||
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
||||||
|
|
||||||
|
/* nvptx map:
|
||||||
|
* programCount : llvm.nvvm.read.ptx.sreg.warpsize
|
||||||
|
* programIndex : llvm.ptx.read.laneid _or_ ed.ptx.sreg.tid.llvm.nvvm.read.ptx.sreg.tid.x & programCount
|
||||||
|
* taskIndex0 : llvm.nvvm.read.ptx.sreg.ctaid.x
|
||||||
|
* taskIndex1 : llvm.nvvm.read.ptx.sreg.ctaid.y
|
||||||
|
* taskIndex3 : llvm.nvvm.read.ptx.sreg.ctaid.z
|
||||||
|
* taskCount0 : llvm.nvvm.read.ptx.sreg.nctaid.x
|
||||||
|
* taskCount1 : llvm.nvvm.read.ptx.sreg.nctaid.y
|
||||||
|
* taskCount3 : llvm.nvvm.read.ptx.sreg.nctaid.z
|
||||||
|
*/
|
||||||
|
|
||||||
|
// llvm.nvvm.read.ptx.sreg.ctaid.x
|
||||||
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
||||||
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
||||||
|
// llvm.nvvm.read.ptx.sreg.ctaid.y
|
||||||
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
||||||
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
||||||
|
// llvm.nvvm.read.ptx.sreg.ctaid.z
|
||||||
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
||||||
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
||||||
|
|
||||||
|
// llvm.nvvm.read.ptx.sreg.nctaid.x
|
||||||
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
||||||
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
||||||
|
// llvm.nvvm.read.ptx.sreg.nctaid.y
|
||||||
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
||||||
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
||||||
|
// llvm.nvvm.read.ptx.sreg.nctaid.z
|
||||||
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
||||||
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
||||||
}
|
}
|
||||||
|
|||||||
48
stdlib.ispc
48
stdlib.ispc
@@ -57,6 +57,54 @@
|
|||||||
#error Unknown value of ISPC_MASK_BITS
|
#error Unknown value of ISPC_MASK_BITS
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// CUDA Specific primitives
|
||||||
|
//
|
||||||
|
#define CUDABLOCKSIZE 128
|
||||||
|
__declspec(safe,cost0)
|
||||||
|
static inline uniform int blockIndex0()
|
||||||
|
{
|
||||||
|
return __ctaid_x();
|
||||||
|
}
|
||||||
|
__declspec(safe,cost0)
|
||||||
|
static inline uniform int blockIndex1()
|
||||||
|
{
|
||||||
|
return __ctaid_y();
|
||||||
|
}
|
||||||
|
__declspec(safe,cost0)
|
||||||
|
static inline uniform int blockIndex2()
|
||||||
|
{
|
||||||
|
return __ctaid_y();
|
||||||
|
}
|
||||||
|
|
||||||
|
__declspec(safe,cost0)
|
||||||
|
static inline uniform int blockCount0()
|
||||||
|
{
|
||||||
|
return __nctaid_x();
|
||||||
|
}
|
||||||
|
__declspec(safe,cost0)
|
||||||
|
static inline uniform int blockCount1()
|
||||||
|
{
|
||||||
|
return __nctaid_y();
|
||||||
|
}
|
||||||
|
__declspec(safe,cost0)
|
||||||
|
static inline uniform int blockCount2()
|
||||||
|
{
|
||||||
|
return __nctaid_z();
|
||||||
|
}
|
||||||
|
__declspec(safe,cost0)
|
||||||
|
static inline uniform int warpSize()
|
||||||
|
{
|
||||||
|
return __warpsize();
|
||||||
|
}
|
||||||
|
__declspec(safe,cost0)
|
||||||
|
static inline uniform int laneIndex()
|
||||||
|
{
|
||||||
|
return __tid_x() & (warpSize()-1);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// Low level primitives
|
// Low level primitives
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user