improvements

This commit is contained in:
Evghenii
2014-02-05 12:04:36 +01:00
parent 048da693c5
commit 686c1d676d
8 changed files with 142 additions and 306 deletions

View File

@@ -62,57 +62,25 @@
///////////////////////////////////////////////////////////////////////////
// CUDA Specific primitives
//
#define CUDABLOCKSIZE 128
#define WARPSIZE2 5
#define WARPSIZE (1<<WARPSIZE2)
/***************/
__declspec(safe,cost0)
static inline uniform int warpSize()
{
return WARPSIZE; //__warpsize();
}
__declspec(safe,cost0) static inline varying int __programIndex() { return __program_index(); }
__declspec(safe,cost0) static inline uniform int __programCount() { return __program_count(); }
__declspec(safe,cost0) static inline uniform int __warpIndex() { return __warp_index(); }
/***************/
__declspec(safe,cost0)
static inline varying int laneIndex()
{
return __tid_x() & (WARPSIZE-1) ; //& (warpSize()-1);
}
__declspec(safe,cost0) static inline uniform int __taskIndex0() { return __task_index0(); }
__declspec(safe,cost0) static inline uniform int __taskIndex1() { return __task_index1(); }
__declspec(safe,cost0) static inline uniform int __taskIndex2() { return __task_index2(); }
__declspec(safe,cost0) static inline uniform int __taskIndex () { return __task_index (); }
/***************/
__declspec(safe,cost0)
static inline uniform int blockIndex0()
{
return (__ctaid_x() * (CUDABLOCKSIZE >> WARPSIZE2)) + (__tid_x() >> WARPSIZE2);
}
/***************/
__declspec(safe,cost0)
static inline uniform int blockIndex1()
{
return __ctaid_y();
}
/***************/
__declspec(safe,cost0)
static inline uniform int blockIndex2()
{
return __ctaid_z();
}
/***************/
__declspec(safe,cost0)
static inline uniform int blockCount0()
{
return __nctaid_x() * (CUDABLOCKSIZE >> WARPSIZE2);
}
/***************/
__declspec(safe,cost0)
static inline uniform int blockCount1()
{
return __nctaid_y();
}
/***************/
__declspec(safe,cost0)
static inline uniform int blockCount2()
{
return __nctaid_z();
}
__declspec(safe,cost0) static inline uniform int __taskCount0() { return __task_count0(); }
__declspec(safe,cost0) static inline uniform int __taskCount1() { return __task_count1(); }
__declspec(safe,cost0) static inline uniform int __taskCount2() { return __task_count2(); }
__declspec(safe,cost0) static inline uniform int __taskCount () { return __task_count (); }
///////////////////////////////////////////////////////////////////////////
// Low level primitives