diff --git a/builtins.cpp b/builtins.cpp index 00f72fc8..64f06e1f 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -476,6 +476,9 @@ lSetInternalFunctions(llvm::Module *module) { "__prefetch_read_uniform_nt", "__rcp_uniform_float", "__rcp_varying_float", + "__rdrand_i16", + "__rdrand_i32", + "__rdrand_i64", "__reduce_add_double", "__reduce_add_float", "__reduce_add_int32", diff --git a/builtins/target-avx1-x2.ll b/builtins/target-avx1-x2.ll index efde5d10..e06134d9 100644 --- a/builtins/target-avx1-x2.ll +++ b/builtins/target-avx1-x2.ll @@ -31,6 +31,8 @@ include(`target-avx-x2.ll') +rdrand_decls() + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll index 64f8ad33..1b47955a 100644 --- a/builtins/target-avx1.ll +++ b/builtins/target-avx1.ll @@ -31,6 +31,8 @@ include(`target-avx.ll') +rdrand_decls() + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-avx11-x2.ll b/builtins/target-avx11-x2.ll index 884255df..cdb83726 100644 --- a/builtins/target-avx11-x2.ll +++ b/builtins/target-avx11-x2.ll @@ -29,9 +29,46 @@ ;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -define(`NO_HALF_DECLARES', `1') +include(`target-avx-x2.ll') -include(`target-avx1-x2.ll') +rdrand_definition() + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int min/max + +define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1) + ret <16 x i32> %ret +} + +define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1) + ret <16 x i32> %ret +} + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; unsigned int min/max + +define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1) + ret <16 x i32> %ret +} + +define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1) + ret <16 x i32> %ret +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; gather + +gen_gather(i8) +gen_gather(i16) +gen_gather(i32) +gen_gather(float) +gen_gather(i64) +gen_gather(double) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float/half conversions diff --git a/builtins/target-avx11.ll b/builtins/target-avx11.ll index 35aebe91..d3ab9f13 100644 --- a/builtins/target-avx11.ll +++ b/builtins/target-avx11.ll @@ -29,9 +29,46 @@ ;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -define(`NO_HALF_DECLARES', `1') +include(`target-avx.ll') -include(`target-avx1.ll') +rdrand_definition() + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int min/max + +define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1) + ret <8 x i32> %ret +} + +define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1) + ret <8 x i32> %ret +} + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; unsigned int min/max + +define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1) + ret <8 x i32> %ret +} + +define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1) + ret <8 x i32> %ret +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; gather + +gen_gather(i8) +gen_gather(i16) +gen_gather(i32) +gen_gather(float) +gen_gather(i64) +gen_gather(double) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; float/half conversions diff --git a/builtins/target-avx2-x2.ll b/builtins/target-avx2-x2.ll index 1ca3443c..1d2a2093 100644 --- a/builtins/target-avx2-x2.ll +++ b/builtins/target-avx2-x2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -31,6 +31,8 @@ include(`target-avx-x2.ll') +rdrand_definition() + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-avx2.ll b/builtins/target-avx2.ll index 7152657e..45496779 100644 --- a/builtins/target-avx2.ll +++ b/builtins/target-avx2.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without @@ -31,6 +31,8 @@ include(`target-avx.ll') +rdrand_definition() + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int min/max diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 77c7aabe..7b4cfd9c 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -40,6 +40,7 @@ include(`util.m4') stdlib_core() scans() reduce_equal(WIDTH) +rdrand_decls() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; broadcast/rotate/shuffle diff --git a/builtins/target-sse2-common.ll b/builtins/target-sse2-common.ll index e0b7f40c..c6a3afe2 100644 --- a/builtins/target-sse2-common.ll +++ b/builtins/target-sse2-common.ll @@ -33,6 +33,7 @@ ctlztz() define_prefetches() define_shuffles() aossoa() +rdrand_decls() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; rcp diff --git a/builtins/target-sse4-common.ll b/builtins/target-sse4-common.ll index 69461fcd..4b8751b5 100644 --- a/builtins/target-sse4-common.ll +++ b/builtins/target-sse4-common.ll @@ -33,6 +33,7 @@ ctlztz() define_prefetches() define_shuffles() aossoa() +rdrand_decls() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; rounding floats diff --git a/builtins/util.m4 b/builtins/util.m4 index 974c799c..614ac998 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -3712,3 +3712,48 @@ define void @__scatter64_$1( %ptrs, %values, ' ) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; rdrand + +define(`rdrand_decls', ` +declare i1 @__rdrand_i16(i16 * nocapture) +declare i1 @__rdrand_i32(i32 * nocapture) +declare i1 @__rdrand_i64(i64 * nocapture) +') + +define(`rdrand_definition', ` +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; rdrand + +declare {i16, i32} @llvm.x86.rdrand.16() +declare {i32, i32} @llvm.x86.rdrand.32() +declare {i64, i32} @llvm.x86.rdrand.64() + +define i1 @__rdrand_i16(i16 * %ptr) { + %v = call {i16, i32} @llvm.x86.rdrand.16() + %v0 = extractvalue {i16, i32} %v, 0 + %v1 = extractvalue {i16, i32} %v, 1 + store i16 %v0, i16 * %ptr + %good = icmp ne i32 %v1, 0 + ret i1 %good +} + +define i1 @__rdrand_i32(i32 * %ptr) { + %v = call {i32, i32} @llvm.x86.rdrand.32() + %v0 = extractvalue {i32, i32} %v, 0 + %v1 = extractvalue {i32, i32} %v, 1 + store i32 %v0, i32 * %ptr + %good = icmp ne i32 %v1, 0 + ret i1 %good +} + +define i1 @__rdrand_i64(i64 * %ptr) { + %v = call {i64, i32} @llvm.x86.rdrand.64() + %v0 = extractvalue {i64, i32} %v, 0 + %v1 = extractvalue {i64, i32} %v, 1 + store i64 %v0, i64 * %ptr + %good = icmp ne i32 %v1, 0 + ret i1 %good +} +') diff --git a/docs/ispc.rst b/docs/ispc.rst index f1f959c9..98250e39 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -140,6 +140,7 @@ Contents: * `Basic Math Functions`_ * `Transcendental Functions`_ * `Pseudo-Random Numbers`_ + * `Random Numbers`_ + `Output Functions`_ + `Assertions`_ @@ -3455,6 +3456,40 @@ be used to get a pseudo-random ``float`` value. uniform unsigned int32 random(RNGState * uniform state) uniform float frandom(uniform RNGState * uniform state) + +Random Numbers +-------------- + +Some recent CPUs (including those based on the Intel(r) Ivy Bridge +micro-architecture), provide support for generating true random numbers. A +few standard library functions make this functionality available: + +:: + + bool rdrand(uniform int32 * uniform ptr) + bool rdrand(varying int32 * uniform ptr) + bool rdrand(uniform int32 * varying ptr) + +If the processor doesn't have sufficient entropy to generate a random +number, then this function fails and returns ``false``. Otherwise, if the +processor is successful, the random value is stored in the given pointer +and ``true`` is returned. Therefore, this function should generally be +used as follows, called repeatedly until it is successful: + +:: + + int r; + while (rdrand(&r) == false) + ; // empty loop body + + +In addition to the ``int32`` variants of ``rdrand()`` listed above, there +are versions that return ``int16``, ``float``, and ``int64`` values as +well. + +Note that when compiling to targets other than ``avx1.1`` and ``avx2``, the +``rdrand()`` functions always return ``false``. + Output Functions ---------------- diff --git a/stdlib.ispc b/stdlib.ispc index a7499930..3774c4a4 100644 --- a/stdlib.ispc +++ b/stdlib.ispc @@ -4068,3 +4068,188 @@ static inline void seed_rng(uniform RNGState * uniform state, static inline void fastmath() { __fastmath(); } + +/////////////////////////////////////////////////////////////////////////// +// rdrand + +static inline uniform bool rdrand(float * uniform ptr) { + if (__have_native_rand == false) + return false; + else { + uniform int32 irand; + uniform bool success = __rdrand_i32(&irand); + if (success) { + irand &= (1<<23)-1; + *ptr = floatbits(0x3F800000 | irand)-1.0f; + } + return success; + } +} + +static inline bool rdrand(varying float * uniform ptr) { + if (__have_native_rand == false) + return false; + else { + bool success = false; + foreach_active (index) { + uniform int32 irand; + if (__rdrand_i32(&irand)) { + // FIXME: it probably would be preferable, here and in the + // following rdrand() function, to do the int->float stuff + // in vector form. However, we need to be careful to not + // clobber any existing already-set values in *ptr with + // inactive lanes here... + irand &= (1<<23)-1; + *ptr = floatbits(0x3F800000 | irand)-1.0f; + success = true; + } + } + return success; + } +} + +static inline bool rdrand(float * ptr) { + if (__have_native_rand == false) + return false; + else { + float * uniform ptrs[programCount]; + ptrs[programIndex] = ptr; + + bool success = false; + foreach_active (index) { + uniform int32 irand; + if (__rdrand_i32(&irand)) { + irand &= (1<<23)-1; + *ptrs[index] = floatbits(0x3F800000 | irand)-1.0f; + success = true; + } + } + return success; + } +} + +static inline uniform bool rdrand(int16 * uniform ptr) { + if (__have_native_rand == false) + return false; + else + return __rdrand_i16(ptr); +} + +static inline bool rdrand(varying int16 * uniform ptr) { + if (__have_native_rand == false) + return false; + else { + bool success = false; + foreach_active (index) { + uniform int16 irand; + if (__rdrand_i16(&irand)) { + *ptr = irand; + success = true; + } + } + return success; + } +} + +static inline bool rdrand(int16 * ptr) { + if (__have_native_rand == false) + return false; + else { + int16 * uniform ptrs[programCount]; + ptrs[programIndex] = ptr; + bool success = false; + + foreach_active (index) { + uniform int16 irand; + if (__rdrand_i16(&irand)) { + *ptrs[index] = irand; + success = true; + } + } + return success; + } +} + +static inline uniform bool rdrand(int32 * uniform ptr) { + if (__have_native_rand == false) + return false; + else + return __rdrand_i32(ptr); +} + +static inline bool rdrand(varying int32 * uniform ptr) { + if (__have_native_rand == false) + return false; + else { + bool success = false; + foreach_active (index) { + uniform int32 irand; + if (__rdrand_i32(&irand)) { + *ptr = irand; + success = true; + } + } + return success; + } +} + +static inline bool rdrand(int32 * ptr) { + if (__have_native_rand == false) + return false; + else { + int32 * uniform ptrs[programCount]; + ptrs[programIndex] = ptr; + bool success = false; + + foreach_active (index) { + uniform int32 irand; + if (__rdrand_i32(&irand)) { + *ptrs[index] = irand; + success = true; + } + } + return success; + } +} + +static inline uniform bool rdrand(int64 * uniform ptr) { + if (__have_native_rand == false) + return false; + else + return __rdrand_i64(ptr); +} + +static inline bool rdrand(varying int64 * uniform ptr) { + if (__have_native_rand == false) + return false; + else { + bool success = false; + foreach_active (index) { + uniform int64 irand; + if (__rdrand_i64(&irand)) { + *ptr = irand; + success = true; + } + } + return success; + } +} + +static inline bool rdrand(int64 * ptr) { + if (__have_native_rand == false) + return false; + else { + int64 * uniform ptrs[programCount]; + ptrs[programIndex] = ptr; + bool success = false; + + foreach_active (index) { + uniform int64 irand; + if (__rdrand_i64(&irand)) { + *ptrs[index] = irand; + success = true; + } + } + return success; + } +} diff --git a/tests/rdrand-1.ispc b/tests/rdrand-1.ispc new file mode 100644 index 00000000..53ca6121 --- /dev/null +++ b/tests/rdrand-1.ispc @@ -0,0 +1,21 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { +#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2) + RET[programIndex] = 1; +#else + + uniform float r = -1; + uniform int count = 0; + while (!rdrand(&r)) { + ++count; + } + RET[programIndex] = (r >= 0 && r < 1); + +#endif +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1; +} diff --git a/tests/rdrand-2.ispc b/tests/rdrand-2.ispc new file mode 100644 index 00000000..7021a271 --- /dev/null +++ b/tests/rdrand-2.ispc @@ -0,0 +1,19 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { +#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2) + RET[programIndex] = 1; +#else + + float r = -1; + while (!rdrand(&r)) + ; + RET[programIndex] = (r >= 0 && r < 1); + +#endif +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1; +} diff --git a/tests/rdrand-3.ispc b/tests/rdrand-3.ispc new file mode 100644 index 00000000..a9fc93a3 --- /dev/null +++ b/tests/rdrand-3.ispc @@ -0,0 +1,25 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { +#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2) + RET[programIndex] = 1; +#else + + int lessHalf = 0, moreHalf = 0; + for (uniform int i = 0; i < 1024*1024; ++i) { + float r = -1; + while (!rdrand(&r)) + ; + if (r < 0.5) ++lessHalf; + else ++moreHalf; + } + + float r = (double)lessHalf / (double)(lessHalf + moreHalf); + RET[programIndex] = (r >= .49 && r < .51); +#endif +} + +export void result(uniform float RET[]) { + RET[programIndex] = 1; +} diff --git a/tests/rdrand-4.ispc b/tests/rdrand-4.ispc new file mode 100644 index 00000000..3b38b7b1 --- /dev/null +++ b/tests/rdrand-4.ispc @@ -0,0 +1,33 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { +#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2) + RET[programIndex] = 0; +#else + + uniform int set[64] = { 0 }; + uniform int count = 1024*1024; + for (uniform int i = 0; i < count; ++i) { + uniform int64 r; + while (!rdrand(&r)) + ; + for (uniform int b = 0; b < 64; ++b) + if (((unsigned int64)r >> b) & 1) + ++set[b]; + } + + RET[programIndex] = 0; + for (uniform int b = 0; b < 64; ++b) { + float r = (double)set[b] / (double)(count); + if (!(r >= .49 && r < .51)) { + print("% % - %\n", b, r, set[b]); + ++RET[programIndex]; + } + } +#endif +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/rdrand-5.ispc b/tests/rdrand-5.ispc new file mode 100644 index 00000000..cbf59a97 --- /dev/null +++ b/tests/rdrand-5.ispc @@ -0,0 +1,33 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { +#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2) + RET[programIndex] = 0; +#else + + int set[32] = { 0 }; + uniform int count = 1024*1024; + for (uniform int i = 0; i < count; ++i) { + int32 r; + while (!rdrand(&r)) + ; + for (uniform int b = 0; b < 32; ++b) + if (((unsigned int32)r >> b) & 1) + ++set[b]; + } + + RET[programIndex] = 0; + for (uniform int b = 0; b < 32; ++b) { + float r = (double)set[b] / (double)(count); + if (!(r >= .49 && r < .51)) { + print("% % - %\n", b, r, set[b]); + ++RET[programIndex]; + } + } +#endif +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +} diff --git a/tests/rdrand-6.ispc b/tests/rdrand-6.ispc new file mode 100644 index 00000000..93137625 --- /dev/null +++ b/tests/rdrand-6.ispc @@ -0,0 +1,35 @@ + +export uniform int width() { return programCount; } + +export void f_f(uniform float RET[], uniform float aFOO[]) { +#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2) + RET[programIndex] = 0; +#else + + int set[32] = { 0 }; + uniform int count = 1024*1024; + for (uniform int i = 0; i < count; ++i) { + uniform int32 rr[programCount]; + int * ptr = rr + programIndex; + while (!rdrand(ptr)) + ; + int32 r = rr[programIndex]; + for (uniform int b = 0; b < 32; ++b) + if (((unsigned int32)r >> b) & 1) + ++set[b]; + } + + RET[programIndex] = 0; + for (uniform int b = 0; b < 32; ++b) { + float r = (double)set[b] / (double)(count); + if (!(r >= .49 && r < .51)) { + print("% % - %\n", b, r, set[b]); + ++RET[programIndex]; + } + } +#endif +} + +export void result(uniform float RET[]) { + RET[programIndex] = 0; +}