diff --git a/examples_cuda/aobench/ao.cu b/examples_cuda/aobench/ao.cu index ca0db34b..8a80281f 100644 --- a/examples_cuda/aobench/ao.cu +++ b/examples_cuda/aobench/ao.cu @@ -93,6 +93,49 @@ struct Float3 } }; +/////////////////////////////////////////////////////////////////////////// +// RNG stuff + +struct RNGState { + unsigned int z1, z2, z3, z4; +}; + +__device__ +static inline unsigned int random(RNGState * state) +{ + unsigned int b; + + b = ((state->z1 << 6) ^ state->z1) >> 13; + state->z1 = ((state->z1 & 4294967294U) << 18) ^ b; + b = ((state->z2 << 2) ^ state->z2) >> 27; + state->z2 = ((state->z2 & 4294967288U) << 2) ^ b; + b = ((state->z3 << 13) ^ state->z3) >> 21; + state->z3 = ((state->z3 & 4294967280U) << 7) ^ b; + b = ((state->z4 << 3) ^ state->z4) >> 12; + state->z4 = ((state->z4 & 4294967168U) << 13) ^ b; + return (state->z1 ^ state->z2 ^ state->z3 ^ state->z4); +} + + +__device__ +static inline float frandom(RNGState * state) +{ + unsigned int irand = random(state); + irand &= (1ul<<23)-1; + return __int_as_float(0x3F800000 | irand)-1.0f; +} + +__device__ +static inline void seed_rng(RNGState * state, + unsigned int seed) { + state->z1 = seed; + state->z2 = seed ^ 0xbeeff00d; + state->z3 = ((seed & 0xfffful) << 16) | (seed >> 16); + state->z4 = (((seed & 0xfful) << 24) | ((seed & 0xff00ul) << 8) | + ((seed & 0xff0000ul) >> 8) | (seed & 0xff000000ul) >> 24); +} + + #define programCount 32 #define programIndex (threadIdx.x & 31) #define taskIndex0 (blockIdx.x*4 + (threadIdx.x >> 5)) diff --git a/examples_cuda/aobench/ao_cu.cpp b/examples_cuda/aobench/ao_cu.cpp index 1432a380..992ed232 100755 --- a/examples_cuda/aobench/ao_cu.cpp +++ b/examples_cuda/aobench/ao_cu.cpp @@ -232,7 +232,6 @@ std::vector readBinary(const char * filename) extern "C" { - void *CUDAAlloc(void **handlePtr, int64_t size, int32_t alignment) { return NULL;