merged with nvptx

This commit is contained in:
Evghenii
2014-02-20 11:01:58 +01:00
68 changed files with 8181 additions and 470 deletions

View File

@@ -0,0 +1,130 @@
#include <cstdio>
#define PRINT_BUF_SIZE 4096
#define uint64_t unsigned long long
static __device__ size_t d_strlen(const char *str)
{
const char *s;
for (s = str; *s; ++s)
;
return (s - str);
}
static __device__ char* d_strncat(char *dest, const char *src, size_t n)
{
size_t dest_len = d_strlen(dest);
size_t i;
for (i = 0 ; i < n && src[i] != '\0' ; i++)
dest[dest_len + i] = src[i];
dest[dest_len + i] = '\0';
return dest;
}
#define APPEND(str) \
do { \
int offset = bufp - &printString[0]; \
*bufp = '\0'; \
d_strncat(bufp, str, PRINT_BUF_SIZE-offset); \
bufp += d_strlen(str); \
if (bufp >= &printString[PRINT_BUF_SIZE]) \
goto done; \
} while (0) /* eat semicolon */
#define PRINT_SCALAR(fmt, type) \
sprintf(tmpBuf, fmt, *((type *)ptr)); \
APPEND(tmpBuf); \
break
#define PRINT_VECTOR(fmt, type) \
*bufp++ = '['; \
if (bufp == &printString[PRINT_BUF_SIZE]) break; \
for (int i = 0; i < width; ++i) { \
/* only print the value if the current lane is executing */ \
type val0 = *((type*)ptr); \
type val = val0; \
if (mask & (1ull<<i)) \
sprintf(tmpBuf, fmt, val); \
else \
sprintf(tmpBuf, "(( * )) "); \
APPEND(tmpBuf); \
*bufp++ = (i != width-1 ? ',' : ']'); \
} \
break
extern "C"
__device__ void __do_print_nvptx(const char *format, const char *types, int width, uint64_t mask,
void **args) {
char printString[PRINT_BUF_SIZE+1]; // +1 for trailing NUL
char *bufp = &printString[0];
char tmpBuf[256];
const char trueBuf[] = "true";
const char falseBuf[] = "false";
int argCount = 0;
while (*format && bufp < &printString[PRINT_BUF_SIZE]) {
// Format strings are just single percent signs.
if (*format != '%') {
*bufp++ = *format;
}
else {
if (*types) {
void *ptr = args[argCount++];
// Based on the encoding in the types string, cast the
// value appropriately and print it with a reasonable
// printf() formatting string.
switch (*types) {
case 'b': {
const char *tmpBuf1 = *((bool *)ptr) ? trueBuf : falseBuf;
APPEND(tmpBuf1);
break;
}
case 'B': {
*bufp++ = '[';
if (bufp == &printString[PRINT_BUF_SIZE])
break;
for (int i = 0; i < width; ++i) {
bool val0 = *((bool*)ptr);
bool val = val0; \
if (mask & (1ull << i)) {
const char *tmpBuf1 = val ? trueBuf : falseBuf;
APPEND(tmpBuf1);
}
else
APPEND("_________");
*bufp++ = (i != width-1) ? ',' : ']';
}
break;
}
case 'i': PRINT_SCALAR("%d", int);
case 'I': PRINT_VECTOR("%d", int);
case 'u': PRINT_SCALAR("%u", unsigned int);
case 'U': PRINT_VECTOR("%u", unsigned int);
case 'f': PRINT_SCALAR("%f", float);
case 'F': PRINT_VECTOR("%f", float);
case 'l': PRINT_SCALAR("%lld", long long);
case 'L': PRINT_VECTOR("%lld", long long);
case 'v': PRINT_SCALAR("%llu", unsigned long long);
case 'V': PRINT_VECTOR("%llu", unsigned long long);
case 'd': PRINT_SCALAR("%f", double);
case 'D': PRINT_VECTOR("%f", double);
case 'p': PRINT_SCALAR("%p", void *);
case 'P': PRINT_VECTOR("%p", void *);
default:
APPEND("UNKNOWN TYPE ");
*bufp++ = *types;
}
++types;
}
}
++format;
}
done:
*bufp = '\n'; bufp++;
*bufp = '\0';
}

View File

@@ -185,6 +185,81 @@ void __do_print(const char *format, const char *types, int width, uint64_t mask,
fflush(stdout);
}
/* this is print for PTX target only */
int __puts_nvptx(const char *);
void __do_print_nvptx(const char *format, const char *types, int width, uint64_t mask,
void **args) {
#if 0
char printString[PRINT_BUF_SIZE+1]; // +1 for trailing NUL
char *bufp = &printString[0];
char tmpBuf[256];
int argCount = 0;
while (*format && bufp < &printString[PRINT_BUF_SIZE]) {
// Format strings are just single percent signs.
if (*format != '%') {
*bufp++ = *format;
}
else {
if (*types) {
void *ptr = args[argCount++];
// Based on the encoding in the types string, cast the
// value appropriately and print it with a reasonable
// printf() formatting string.
switch (*types) {
case 'b': {
sprintf(tmpBuf, "%s", *((Bool *)ptr) ? "true" : "false");
APPEND(tmpBuf);
break;
}
case 'B': {
*bufp++ = '[';
if (bufp == &printString[PRINT_BUF_SIZE])
break;
for (int i = 0; i < width; ++i) {
if (mask & (1ull << i)) {
sprintf(tmpBuf, "%s", ((Bool *)ptr)[i] ? "true" : "false");
APPEND(tmpBuf);
}
else
APPEND("_________");
*bufp++ = (i != width-1) ? ',' : ']';
}
break;
}
case 'i': PRINT_SCALAR("%d", int);
case 'I': PRINT_VECTOR("%d", int);
case 'u': PRINT_SCALAR("%u", unsigned int);
case 'U': PRINT_VECTOR("%u", unsigned int);
case 'f': PRINT_SCALAR("%f", float);
case 'F': PRINT_VECTOR("%f", float);
case 'l': PRINT_SCALAR("%lld", long long);
case 'L': PRINT_VECTOR("%lld", long long);
case 'v': PRINT_SCALAR("%llu", unsigned long long);
case 'V': PRINT_VECTOR("%llu", unsigned long long);
case 'd': PRINT_SCALAR("%f", double);
case 'D': PRINT_VECTOR("%f", double);
case 'p': PRINT_SCALAR("%p", void *);
case 'P': PRINT_VECTOR("%p", void *);
default:
APPEND("UNKNOWN TYPE ");
*bufp++ = *types;
}
++types;
}
}
++format;
}
done:
*bufp = '\n'; bufp++;
*bufp = '\0';
__puts_nvptx(printString);
#else
__puts_nvptx("---nvptx printing is not support---\n");
#endif
}
int __num_cores() {
#if defined(_MSC_VER) || defined(__MINGW32__)

View File

@@ -288,4 +288,5 @@ define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
;; int8/int16 builtins
define_avgs()
declare_nvptx()

View File

@@ -10,6 +10,7 @@ packed_load_and_store()
scans()
int64minmax()
aossoa()
declare_nvptx()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; masked store

View File

@@ -392,4 +392,4 @@ declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind
;; int8/int16 builtins
define_avgs()
declare_nvptx()

View File

@@ -344,3 +344,4 @@ packed_load_and_store(4)
;; prefetch
define_prefetches()
declare_nvptx()

2235
builtins/target-nvptx.ll Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -274,3 +274,4 @@ define i64 @__popcnt_int64(i64) nounwind readnone alwaysinline {
define_avgs()
declare_nvptx()

View File

@@ -278,3 +278,5 @@ define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
%call = call i64 @llvm.ctpop.i64(i64 %0)
ret i64 %call
}
declare_nvptx()

3417
builtins/util-nvptx.m4 Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -4541,3 +4541,60 @@ define(`rcpd_decl', `
declare double @__rcp_uniform_double(double)
declare <WIDTH x double> @__rcp_varying_double(<WIDTH x double>)
')
define(`declare_nvptx',
`
declare i32 @__program_index() nounwind readnone alwaysinline
declare i32 @__program_count() nounwind readnone alwaysinline
declare i32 @__warp_index() nounwind readnone alwaysinline
declare i32 @__task_index0() nounwind readnone alwaysinline
declare i32 @__task_index1() nounwind readnone alwaysinline
declare i32 @__task_index2() nounwind readnone alwaysinline
declare i32 @__task_index() nounwind readnone alwaysinline
declare i32 @__task_count0() nounwind readnone alwaysinline
declare i32 @__task_count1() nounwind readnone alwaysinline
declare i32 @__task_count2() nounwind readnone alwaysinline
declare i32 @__task_count() nounwind readnone alwaysinline
declare i64* @__cvt_loc2gen(i64 addrspace(3)*) nounwind readnone alwaysinline
declare i64* @__cvt_const2gen(i64 addrspace(4)*) nounwind readnone alwaysinline
declare i64* @__cvt_loc2gen_var(i64 addrspace(3)*) nounwind readnone alwaysinline
declare i64 @__movmsk_ptx(<WIDTH x i1>) nounwind readnone alwaysinline;
')
define(`global_atomic_varying',`
declare <$1 x $3> @__atomic_$2_varying_$4_global(<$1 x i64> %ptr, <$1 x $3> %val, <$1 x MASK> %maskv) nounwind alwaysinline
')
define(`global_atomic_cas_varying',`
declare <$1 x $3> @__atomic_$2_varying_$4_global(<$1 x i64> %ptr, <$1 x $3> %cmp, <$1 x $3> %val, <$1 x MASK> %maskv) nounwind alwaysinline
')
global_atomic_cas_varying(WIDTH, compare_exchange, i32, int32)
global_atomic_cas_varying(WIDTH, compare_exchange, i64, int64)
global_atomic_cas_varying(WIDTH, compare_exchange, float, float)
global_atomic_cas_varying(WIDTH, compare_exchange, double, double)
global_atomic_varying(WIDTH, swap, i32, int32)
global_atomic_varying(WIDTH, swap, i64, int64)
global_atomic_varying(WIDTH, swap, float, float)
global_atomic_varying(WIDTH, swap, double, double)
global_atomic_varying(WIDTH, add, i32, int32)
global_atomic_varying(WIDTH, sub, i32, int32)
global_atomic_varying(WIDTH, and, i32, int32)
global_atomic_varying(WIDTH, or, i32, int32)
global_atomic_varying(WIDTH, xor, i32, int32)
global_atomic_varying(WIDTH, min, i32, int32)
global_atomic_varying(WIDTH, max, i32, int32)
global_atomic_varying(WIDTH, umin, i32, uint32)
global_atomic_varying(WIDTH, umax, i32, uint32)
global_atomic_varying(WIDTH, add, i64, int64)
global_atomic_varying(WIDTH, sub, i64, int64)
global_atomic_varying(WIDTH, and, i64, int64)
global_atomic_varying(WIDTH, or, i64, int64)
global_atomic_varying(WIDTH, xor, i64, int64)
global_atomic_varying(WIDTH, min, i64, int64)
global_atomic_varying(WIDTH, max, i64, int64)
global_atomic_varying(WIDTH, umin, i64, uint64)
global_atomic_varying(WIDTH, umax, i64, uint64)