merged with nvptx
This commit is contained in:
130
builtins/__do_print_nvptx.cu
Normal file
130
builtins/__do_print_nvptx.cu
Normal file
@@ -0,0 +1,130 @@
|
||||
#include <cstdio>
|
||||
|
||||
#define PRINT_BUF_SIZE 4096
|
||||
#define uint64_t unsigned long long
|
||||
|
||||
static __device__ size_t d_strlen(const char *str)
|
||||
{
|
||||
const char *s;
|
||||
|
||||
for (s = str; *s; ++s)
|
||||
;
|
||||
return (s - str);
|
||||
}
|
||||
|
||||
static __device__ char* d_strncat(char *dest, const char *src, size_t n)
|
||||
{
|
||||
size_t dest_len = d_strlen(dest);
|
||||
size_t i;
|
||||
|
||||
for (i = 0 ; i < n && src[i] != '\0' ; i++)
|
||||
dest[dest_len + i] = src[i];
|
||||
dest[dest_len + i] = '\0';
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
#define APPEND(str) \
|
||||
do { \
|
||||
int offset = bufp - &printString[0]; \
|
||||
*bufp = '\0'; \
|
||||
d_strncat(bufp, str, PRINT_BUF_SIZE-offset); \
|
||||
bufp += d_strlen(str); \
|
||||
if (bufp >= &printString[PRINT_BUF_SIZE]) \
|
||||
goto done; \
|
||||
} while (0) /* eat semicolon */
|
||||
|
||||
|
||||
#define PRINT_SCALAR(fmt, type) \
|
||||
sprintf(tmpBuf, fmt, *((type *)ptr)); \
|
||||
APPEND(tmpBuf); \
|
||||
break
|
||||
|
||||
#define PRINT_VECTOR(fmt, type) \
|
||||
*bufp++ = '['; \
|
||||
if (bufp == &printString[PRINT_BUF_SIZE]) break; \
|
||||
for (int i = 0; i < width; ++i) { \
|
||||
/* only print the value if the current lane is executing */ \
|
||||
type val0 = *((type*)ptr); \
|
||||
type val = val0; \
|
||||
if (mask & (1ull<<i)) \
|
||||
sprintf(tmpBuf, fmt, val); \
|
||||
else \
|
||||
sprintf(tmpBuf, "(( * )) "); \
|
||||
APPEND(tmpBuf); \
|
||||
*bufp++ = (i != width-1 ? ',' : ']'); \
|
||||
} \
|
||||
break
|
||||
|
||||
extern "C"
|
||||
__device__ void __do_print_nvptx(const char *format, const char *types, int width, uint64_t mask,
|
||||
void **args) {
|
||||
char printString[PRINT_BUF_SIZE+1]; // +1 for trailing NUL
|
||||
char *bufp = &printString[0];
|
||||
char tmpBuf[256];
|
||||
const char trueBuf[] = "true";
|
||||
const char falseBuf[] = "false";
|
||||
|
||||
int argCount = 0;
|
||||
while (*format && bufp < &printString[PRINT_BUF_SIZE]) {
|
||||
// Format strings are just single percent signs.
|
||||
if (*format != '%') {
|
||||
*bufp++ = *format;
|
||||
}
|
||||
else {
|
||||
if (*types) {
|
||||
void *ptr = args[argCount++];
|
||||
// Based on the encoding in the types string, cast the
|
||||
// value appropriately and print it with a reasonable
|
||||
// printf() formatting string.
|
||||
switch (*types) {
|
||||
case 'b': {
|
||||
const char *tmpBuf1 = *((bool *)ptr) ? trueBuf : falseBuf;
|
||||
APPEND(tmpBuf1);
|
||||
break;
|
||||
}
|
||||
case 'B': {
|
||||
*bufp++ = '[';
|
||||
if (bufp == &printString[PRINT_BUF_SIZE])
|
||||
break;
|
||||
for (int i = 0; i < width; ++i) {
|
||||
bool val0 = *((bool*)ptr);
|
||||
bool val = val0; \
|
||||
if (mask & (1ull << i)) {
|
||||
const char *tmpBuf1 = val ? trueBuf : falseBuf;
|
||||
APPEND(tmpBuf1);
|
||||
}
|
||||
else
|
||||
APPEND("_________");
|
||||
*bufp++ = (i != width-1) ? ',' : ']';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'i': PRINT_SCALAR("%d", int);
|
||||
case 'I': PRINT_VECTOR("%d", int);
|
||||
case 'u': PRINT_SCALAR("%u", unsigned int);
|
||||
case 'U': PRINT_VECTOR("%u", unsigned int);
|
||||
case 'f': PRINT_SCALAR("%f", float);
|
||||
case 'F': PRINT_VECTOR("%f", float);
|
||||
case 'l': PRINT_SCALAR("%lld", long long);
|
||||
case 'L': PRINT_VECTOR("%lld", long long);
|
||||
case 'v': PRINT_SCALAR("%llu", unsigned long long);
|
||||
case 'V': PRINT_VECTOR("%llu", unsigned long long);
|
||||
case 'd': PRINT_SCALAR("%f", double);
|
||||
case 'D': PRINT_VECTOR("%f", double);
|
||||
case 'p': PRINT_SCALAR("%p", void *);
|
||||
case 'P': PRINT_VECTOR("%p", void *);
|
||||
default:
|
||||
APPEND("UNKNOWN TYPE ");
|
||||
*bufp++ = *types;
|
||||
}
|
||||
++types;
|
||||
}
|
||||
}
|
||||
++format;
|
||||
}
|
||||
|
||||
done:
|
||||
*bufp = '\n'; bufp++;
|
||||
*bufp = '\0';
|
||||
}
|
||||
@@ -185,6 +185,81 @@ void __do_print(const char *format, const char *types, int width, uint64_t mask,
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
/* this is print for PTX target only */
|
||||
int __puts_nvptx(const char *);
|
||||
void __do_print_nvptx(const char *format, const char *types, int width, uint64_t mask,
|
||||
void **args) {
|
||||
#if 0
|
||||
char printString[PRINT_BUF_SIZE+1]; // +1 for trailing NUL
|
||||
char *bufp = &printString[0];
|
||||
char tmpBuf[256];
|
||||
|
||||
int argCount = 0;
|
||||
while (*format && bufp < &printString[PRINT_BUF_SIZE]) {
|
||||
// Format strings are just single percent signs.
|
||||
if (*format != '%') {
|
||||
*bufp++ = *format;
|
||||
}
|
||||
else {
|
||||
if (*types) {
|
||||
void *ptr = args[argCount++];
|
||||
// Based on the encoding in the types string, cast the
|
||||
// value appropriately and print it with a reasonable
|
||||
// printf() formatting string.
|
||||
switch (*types) {
|
||||
case 'b': {
|
||||
sprintf(tmpBuf, "%s", *((Bool *)ptr) ? "true" : "false");
|
||||
APPEND(tmpBuf);
|
||||
break;
|
||||
}
|
||||
case 'B': {
|
||||
*bufp++ = '[';
|
||||
if (bufp == &printString[PRINT_BUF_SIZE])
|
||||
break;
|
||||
for (int i = 0; i < width; ++i) {
|
||||
if (mask & (1ull << i)) {
|
||||
sprintf(tmpBuf, "%s", ((Bool *)ptr)[i] ? "true" : "false");
|
||||
APPEND(tmpBuf);
|
||||
}
|
||||
else
|
||||
APPEND("_________");
|
||||
*bufp++ = (i != width-1) ? ',' : ']';
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'i': PRINT_SCALAR("%d", int);
|
||||
case 'I': PRINT_VECTOR("%d", int);
|
||||
case 'u': PRINT_SCALAR("%u", unsigned int);
|
||||
case 'U': PRINT_VECTOR("%u", unsigned int);
|
||||
case 'f': PRINT_SCALAR("%f", float);
|
||||
case 'F': PRINT_VECTOR("%f", float);
|
||||
case 'l': PRINT_SCALAR("%lld", long long);
|
||||
case 'L': PRINT_VECTOR("%lld", long long);
|
||||
case 'v': PRINT_SCALAR("%llu", unsigned long long);
|
||||
case 'V': PRINT_VECTOR("%llu", unsigned long long);
|
||||
case 'd': PRINT_SCALAR("%f", double);
|
||||
case 'D': PRINT_VECTOR("%f", double);
|
||||
case 'p': PRINT_SCALAR("%p", void *);
|
||||
case 'P': PRINT_VECTOR("%p", void *);
|
||||
default:
|
||||
APPEND("UNKNOWN TYPE ");
|
||||
*bufp++ = *types;
|
||||
}
|
||||
++types;
|
||||
}
|
||||
}
|
||||
++format;
|
||||
}
|
||||
|
||||
done:
|
||||
*bufp = '\n'; bufp++;
|
||||
*bufp = '\0';
|
||||
__puts_nvptx(printString);
|
||||
#else
|
||||
__puts_nvptx("---nvptx printing is not support---\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int __num_cores() {
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
|
||||
@@ -288,4 +288,5 @@ define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
||||
;; int8/int16 builtins
|
||||
|
||||
define_avgs()
|
||||
declare_nvptx()
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
aossoa()
|
||||
declare_nvptx()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
@@ -392,4 +392,4 @@ declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind
|
||||
;; int8/int16 builtins
|
||||
|
||||
define_avgs()
|
||||
|
||||
declare_nvptx()
|
||||
|
||||
@@ -344,3 +344,4 @@ packed_load_and_store(4)
|
||||
;; prefetch
|
||||
|
||||
define_prefetches()
|
||||
declare_nvptx()
|
||||
|
||||
2235
builtins/target-nvptx.ll
Normal file
2235
builtins/target-nvptx.ll
Normal file
File diff suppressed because it is too large
Load Diff
@@ -274,3 +274,4 @@ define i64 @__popcnt_int64(i64) nounwind readnone alwaysinline {
|
||||
|
||||
define_avgs()
|
||||
|
||||
declare_nvptx()
|
||||
|
||||
@@ -278,3 +278,5 @@ define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
||||
%call = call i64 @llvm.ctpop.i64(i64 %0)
|
||||
ret i64 %call
|
||||
}
|
||||
|
||||
declare_nvptx()
|
||||
|
||||
3417
builtins/util-nvptx.m4
Normal file
3417
builtins/util-nvptx.m4
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4541,3 +4541,60 @@ define(`rcpd_decl', `
|
||||
declare double @__rcp_uniform_double(double)
|
||||
declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>)
|
||||
')
|
||||
|
||||
define(`declare_nvptx',
|
||||
`
|
||||
declare i32 @__program_index() nounwind readnone alwaysinline
|
||||
declare i32 @__program_count() nounwind readnone alwaysinline
|
||||
declare i32 @__warp_index() nounwind readnone alwaysinline
|
||||
declare i32 @__task_index0() nounwind readnone alwaysinline
|
||||
declare i32 @__task_index1() nounwind readnone alwaysinline
|
||||
declare i32 @__task_index2() nounwind readnone alwaysinline
|
||||
declare i32 @__task_index() nounwind readnone alwaysinline
|
||||
declare i32 @__task_count0() nounwind readnone alwaysinline
|
||||
declare i32 @__task_count1() nounwind readnone alwaysinline
|
||||
declare i32 @__task_count2() nounwind readnone alwaysinline
|
||||
declare i32 @__task_count() nounwind readnone alwaysinline
|
||||
declare i64* @__cvt_loc2gen(i64 addrspace(3)*) nounwind readnone alwaysinline
|
||||
declare i64* @__cvt_const2gen(i64 addrspace(4)*) nounwind readnone alwaysinline
|
||||
declare i64* @__cvt_loc2gen_var(i64 addrspace(3)*) nounwind readnone alwaysinline
|
||||
declare i64 @__movmsk_ptx(<WIDTH x i1>) nounwind readnone alwaysinline;
|
||||
')
|
||||
|
||||
define(`global_atomic_varying',`
|
||||
declare <$1 x $3> @__atomic_$2_varying_$4_global(<$1 x i64> %ptr, <$1 x $3> %val, <$1 x MASK> %maskv) nounwind alwaysinline
|
||||
')
|
||||
|
||||
define(`global_atomic_cas_varying',`
|
||||
declare <$1 x $3> @__atomic_$2_varying_$4_global(<$1 x i64> %ptr, <$1 x $3> %cmp, <$1 x $3> %val, <$1 x MASK> %maskv) nounwind alwaysinline
|
||||
')
|
||||
|
||||
global_atomic_cas_varying(WIDTH, compare_exchange, i32, int32)
|
||||
global_atomic_cas_varying(WIDTH, compare_exchange, i64, int64)
|
||||
global_atomic_cas_varying(WIDTH, compare_exchange, float, float)
|
||||
global_atomic_cas_varying(WIDTH, compare_exchange, double, double)
|
||||
|
||||
global_atomic_varying(WIDTH, swap, i32, int32)
|
||||
global_atomic_varying(WIDTH, swap, i64, int64)
|
||||
global_atomic_varying(WIDTH, swap, float, float)
|
||||
global_atomic_varying(WIDTH, swap, double, double)
|
||||
|
||||
global_atomic_varying(WIDTH, add, i32, int32)
|
||||
global_atomic_varying(WIDTH, sub, i32, int32)
|
||||
global_atomic_varying(WIDTH, and, i32, int32)
|
||||
global_atomic_varying(WIDTH, or, i32, int32)
|
||||
global_atomic_varying(WIDTH, xor, i32, int32)
|
||||
global_atomic_varying(WIDTH, min, i32, int32)
|
||||
global_atomic_varying(WIDTH, max, i32, int32)
|
||||
global_atomic_varying(WIDTH, umin, i32, uint32)
|
||||
global_atomic_varying(WIDTH, umax, i32, uint32)
|
||||
|
||||
global_atomic_varying(WIDTH, add, i64, int64)
|
||||
global_atomic_varying(WIDTH, sub, i64, int64)
|
||||
global_atomic_varying(WIDTH, and, i64, int64)
|
||||
global_atomic_varying(WIDTH, or, i64, int64)
|
||||
global_atomic_varying(WIDTH, xor, i64, int64)
|
||||
global_atomic_varying(WIDTH, min, i64, int64)
|
||||
global_atomic_varying(WIDTH, max, i64, int64)
|
||||
global_atomic_varying(WIDTH, umin, i64, uint64)
|
||||
global_atomic_varying(WIDTH, umax, i64, uint64)
|
||||
|
||||
Reference in New Issue
Block a user