#include #define PRINT_BUF_SIZE 4096 #define uint64_t unsigned long long static __device__ size_t d_strlen(const char *str) { const char *s; for (s = str; *s; ++s) ; return (s - str); } static __device__ char* d_strncat(char *dest, const char *src, size_t n) { size_t dest_len = d_strlen(dest); size_t i; for (i = 0 ; i < n && src[i] != '\0' ; i++) dest[dest_len + i] = src[i]; dest[dest_len + i] = '\0'; return dest; } #define APPEND(str) \ do { \ int offset = bufp - &printString[0]; \ *bufp = '\0'; \ d_strncat(bufp, str, PRINT_BUF_SIZE-offset); \ bufp += d_strlen(str); \ if (bufp >= &printString[PRINT_BUF_SIZE]) \ goto done; \ } while (0) /* eat semicolon */ #define PRINT_SCALAR(fmt, type) \ sprintf(tmpBuf, fmt, *((type *)ptr)); \ APPEND(tmpBuf); \ break #define PRINT_VECTOR(fmt, type) \ *bufp++ = '['; \ if (bufp == &printString[PRINT_BUF_SIZE]) break; \ for (int i = 0; i < width; ++i) { \ /* only print the value if the current lane is executing */ \ type val0 = *((type*)ptr); \ type val = val0; \ if (mask & (1ull<