Add support for RDRAND in IvyBridge.

The standard library now provides a variety of rdrand() functions that call out to RDRAND, when available. Issue #263.
2012-07-12 06:07:07 -07:00
parent 2bacebb1fb
commit 2c640f7e52
19 changed files with 525 additions and 6 deletions
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -476,6 +476,9 @@ lSetInternalFunctions(llvm::Module *module) {
        "__prefetch_read_uniform_nt",
        "__rcp_uniform_float",
        "__rcp_varying_float",
+        "__rdrand_i16",
+        "__rdrand_i32",
+        "__rdrand_i64",
        "__reduce_add_double",
        "__reduce_add_float",
        "__reduce_add_int32",
--- a/builtins/target-avx1-x2.ll
+++ b/builtins/target-avx1-x2.ll
@@ -31,6 +31,8 @@

 include(`target-avx-x2.ll')

+rdrand_decls()
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; int min/max

--- a/builtins/target-avx1.ll
+++ b/builtins/target-avx1.ll
@@ -31,6 +31,8 @@

 include(`target-avx.ll')

+rdrand_decls()
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; int min/max

--- a/builtins/target-avx11-x2.ll
+++ b/builtins/target-avx11-x2.ll
@@ -29,9 +29,46 @@
 ;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  

-define(`NO_HALF_DECLARES', `1')
+include(`target-avx-x2.ll')

-include(`target-avx1-x2.ll')
+rdrand_definition()
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; int min/max
+
+define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+  binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+  binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
+  ret <16 x i32> %ret
+}
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; unsigned int min/max
+
+define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+  binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+  binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
+  ret <16 x i32> %ret
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; gather
+
+gen_gather(i8)
+gen_gather(i16)
+gen_gather(i32)
+gen_gather(float)
+gen_gather(i64)
+gen_gather(double)

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float/half conversions
--- a/builtins/target-avx11.ll
+++ b/builtins/target-avx11.ll
@@ -29,9 +29,46 @@
 ;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  

-define(`NO_HALF_DECLARES', `1')
+include(`target-avx.ll')

-include(`target-avx1.ll')
+rdrand_definition()
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; int min/max
+
+define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+  binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+  binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
+  ret <8 x i32> %ret
+}
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; unsigned int min/max
+
+define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+  binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+  binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
+  ret <8 x i32> %ret
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; gather
+
+gen_gather(i8)
+gen_gather(i16)
+gen_gather(i32)
+gen_gather(float)
+gen_gather(i64)
+gen_gather(double)

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float/half conversions
--- a/builtins/target-avx2-x2.ll
+++ b/builtins/target-avx2-x2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2011, Intel Corporation
+;;  Copyright (c) 2010-2012, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,8 @@

 include(`target-avx-x2.ll')

+rdrand_definition()
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; int min/max

--- a/builtins/target-avx2.ll
+++ b/builtins/target-avx2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2011, Intel Corporation
+;;  Copyright (c) 2010-2012, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,8 @@

 include(`target-avx.ll')

+rdrand_definition()
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; int min/max

--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -40,6 +40,7 @@ include(`util.m4')
 stdlib_core()
 scans()
 reduce_equal(WIDTH)
+rdrand_decls()

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; broadcast/rotate/shuffle
--- a/builtins/target-sse2-common.ll
+++ b/builtins/target-sse2-common.ll
@@ -33,6 +33,7 @@ ctlztz()
 define_prefetches()
 define_shuffles()
 aossoa()
+rdrand_decls()

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; rcp
--- a/builtins/target-sse4-common.ll
+++ b/builtins/target-sse4-common.ll
@@ -33,6 +33,7 @@ ctlztz()
 define_prefetches()
 define_shuffles()
 aossoa()
+rdrand_decls()

 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; rounding floats
--- a/builtins/util.m4
+++ b/builtins/util.m4
@@ -3712,3 +3712,48 @@ define void @__scatter64_$1(<WIDTH x i64> %ptrs, <WIDTH x $1> %values,

 '
 )
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; rdrand 
+
+define(`rdrand_decls', `
+declare i1 @__rdrand_i16(i16 * nocapture)
+declare i1 @__rdrand_i32(i32 * nocapture)
+declare i1 @__rdrand_i64(i64 * nocapture)
+')
+
+define(`rdrand_definition', `
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; rdrand
+
+declare {i16, i32} @llvm.x86.rdrand.16()
+declare {i32, i32} @llvm.x86.rdrand.32()
+declare {i64, i32} @llvm.x86.rdrand.64()
+
+define i1 @__rdrand_i16(i16 * %ptr) {
+  %v = call {i16, i32} @llvm.x86.rdrand.16()
+  %v0 = extractvalue {i16, i32} %v, 0
+  %v1 = extractvalue {i16, i32} %v, 1
+  store i16 %v0, i16 * %ptr
+  %good = icmp ne i32 %v1, 0
+  ret i1 %good
+}
+
+define i1 @__rdrand_i32(i32 * %ptr) {
+  %v = call {i32, i32} @llvm.x86.rdrand.32()
+  %v0 = extractvalue {i32, i32} %v, 0
+  %v1 = extractvalue {i32, i32} %v, 1
+  store i32 %v0, i32 * %ptr
+  %good = icmp ne i32 %v1, 0
+  ret i1 %good
+}
+
+define i1 @__rdrand_i64(i64 * %ptr) {
+  %v = call {i64, i32} @llvm.x86.rdrand.64()
+  %v0 = extractvalue {i64, i32} %v, 0
+  %v1 = extractvalue {i64, i32} %v, 1
+  store i64 %v0, i64 * %ptr
+  %good = icmp ne i32 %v1, 0
+  ret i1 %good
+}
+')
--- a/docs/ispc.rst
+++ b/docs/ispc.rst
@@ -140,6 +140,7 @@ Contents:
    * `Basic Math Functions`_
    * `Transcendental Functions`_
    * `Pseudo-Random Numbers`_
+    * `Random Numbers`_

  + `Output Functions`_
  + `Assertions`_
@@ -3455,6 +3456,40 @@ be used to get a pseudo-random ``float`` value.
    uniform unsigned int32 random(RNGState * uniform state)
    uniform float frandom(uniform RNGState * uniform state)

+
+Random Numbers
+--------------
+
+Some recent CPUs (including those based on the Intel(r) Ivy Bridge
+micro-architecture), provide support for generating true random numbers.  A
+few standard library functions make this functionality available:
+
+::
+
+    bool rdrand(uniform int32 * uniform ptr)
+    bool rdrand(varying int32 * uniform ptr)
+    bool rdrand(uniform int32 * varying ptr)
+
+If the processor doesn't have sufficient entropy to generate a random
+number, then this function fails and returns ``false``.  Otherwise, if the
+processor is successful, the random value is stored in the given pointer
+and ``true`` is returned.  Therefore, this function should generally be
+used as follows, called repeatedly until it is successful:
+
+::
+
+    int r;
+    while (rdrand(&r) == false)
+        ; // empty loop body
+   
+
+In addition to the ``int32`` variants of ``rdrand()`` listed above, there
+are versions that return ``int16``, ``float``, and ``int64`` values as
+well.
+
+Note that when compiling to targets other than ``avx1.1`` and ``avx2``, the
+``rdrand()`` functions always return ``false``.
+
 Output Functions
 ----------------

--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -4068,3 +4068,188 @@ static inline void seed_rng(uniform RNGState * uniform state,
 static inline void fastmath() {
    __fastmath();
 }
+
+///////////////////////////////////////////////////////////////////////////
+// rdrand
+
+static inline uniform bool rdrand(float * uniform ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else {
+        uniform int32 irand;
+        uniform bool success = __rdrand_i32(&irand);
+        if (success) {
+            irand &= (1<<23)-1;
+            *ptr = floatbits(0x3F800000 | irand)-1.0f;
+        }
+        return success;
+    }
+}
+
+static inline bool rdrand(varying float * uniform ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else {
+        bool success = false;
+        foreach_active (index) {
+            uniform int32 irand;
+            if (__rdrand_i32(&irand)) {
+                // FIXME: it probably would be preferable, here and in the
+                // following rdrand() function, to do the int->float stuff
+                // in vector form.  However, we need to be careful to not
+                // clobber any existing already-set values in *ptr with
+                // inactive lanes here...
+                irand &= (1<<23)-1;
+                *ptr = floatbits(0x3F800000 | irand)-1.0f;
+                success = true;
+            }
+        }
+        return success;
+    }
+}
+
+static inline bool rdrand(float * ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else {
+        float * uniform ptrs[programCount];
+        ptrs[programIndex] = ptr;
+
+        bool success = false;
+        foreach_active (index) {
+            uniform int32 irand;
+            if (__rdrand_i32(&irand)) {
+                irand &= (1<<23)-1;
+                *ptrs[index] = floatbits(0x3F800000 | irand)-1.0f;
+                success = true;
+            }
+        }
+        return success;
+    }
+}
+
+static inline uniform bool rdrand(int16 * uniform ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else
+        return __rdrand_i16(ptr);
+}
+
+static inline bool rdrand(varying int16 * uniform ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else {
+        bool success = false;
+        foreach_active (index) {
+            uniform int16 irand;
+            if (__rdrand_i16(&irand)) {
+                *ptr = irand;
+                success = true;
+            }
+        }
+        return success;
+    }
+}
+
+static inline bool rdrand(int16 * ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else {
+        int16 * uniform ptrs[programCount];
+        ptrs[programIndex] = ptr;
+        bool success = false;
+
+        foreach_active (index) {
+            uniform int16 irand;
+            if (__rdrand_i16(&irand)) {
+                *ptrs[index] = irand;
+                success = true;
+            }
+        }
+        return success;
+    }
+}
+
+static inline uniform bool rdrand(int32 * uniform ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else
+        return __rdrand_i32(ptr);
+}
+
+static inline bool rdrand(varying int32 * uniform ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else {
+        bool success = false;
+        foreach_active (index) {
+            uniform int32 irand;
+            if (__rdrand_i32(&irand)) {
+                *ptr = irand;
+                success = true;
+            }
+        }
+        return success;
+    }
+}
+
+static inline bool rdrand(int32 * ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else {
+        int32 * uniform ptrs[programCount];
+        ptrs[programIndex] = ptr;
+        bool success = false;
+
+        foreach_active (index) {
+            uniform int32 irand;
+            if (__rdrand_i32(&irand)) {
+                *ptrs[index] = irand;
+                success = true;
+            }
+        }
+        return success;
+    }
+}
+
+static inline uniform bool rdrand(int64 * uniform ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else
+        return __rdrand_i64(ptr);
+}
+
+static inline bool rdrand(varying int64 * uniform ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else {
+        bool success = false;
+        foreach_active (index) {
+            uniform int64 irand;
+            if (__rdrand_i64(&irand)) {
+                *ptr = irand;
+                success = true;
+            }
+        }
+        return success;
+    }
+}
+
+static inline bool rdrand(int64 * ptr) {
+    if (__have_native_rand == false)
+        return false;
+    else {
+        int64 * uniform ptrs[programCount];
+        ptrs[programIndex] = ptr;
+        bool success = false;
+
+        foreach_active (index) {
+            uniform int64 irand;
+            if (__rdrand_i64(&irand)) {
+                *ptrs[index] = irand;
+                success = true;
+            }
+        }
+        return success;
+    }
+}
--- a/tests/rdrand-1.ispc
+++ b/tests/rdrand-1.ispc
@@ -0,0 +1,21 @@
+
+export uniform int width() { return programCount; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2)
+    RET[programIndex] = 1;
+#else
+
+    uniform float r = -1;
+    uniform int count = 0;
+    while (!rdrand(&r)) {
+        ++count;
+    }
+    RET[programIndex] = (r >= 0 && r < 1);
+
+#endif
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 1;
+}
--- a/tests/rdrand-2.ispc
+++ b/tests/rdrand-2.ispc
@@ -0,0 +1,19 @@
+
+export uniform int width() { return programCount; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2)
+    RET[programIndex] = 1;
+#else
+
+    float r = -1;
+    while (!rdrand(&r))
+        ;
+    RET[programIndex] = (r >= 0 && r < 1);
+
+#endif
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 1;
+}
--- a/tests/rdrand-3.ispc
+++ b/tests/rdrand-3.ispc
@@ -0,0 +1,25 @@
+
+export uniform int width() { return programCount; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2)
+    RET[programIndex] = 1;
+#else
+
+    int lessHalf = 0, moreHalf = 0;
+    for (uniform int i = 0; i < 1024*1024; ++i) {
+        float r = -1;
+        while (!rdrand(&r))
+            ;
+        if (r < 0.5) ++lessHalf;
+        else ++moreHalf;
+    }
+
+    float r = (double)lessHalf / (double)(lessHalf + moreHalf);
+    RET[programIndex] = (r >= .49 && r < .51);
+#endif
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 1;
+}
--- a/tests/rdrand-4.ispc
+++ b/tests/rdrand-4.ispc
@@ -0,0 +1,33 @@
+
+export uniform int width() { return programCount; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2)
+    RET[programIndex] = 0;
+#else
+
+    uniform int set[64] = { 0 };
+    uniform int count = 1024*1024;
+    for (uniform int i = 0; i < count; ++i) {
+        uniform int64 r;
+        while (!rdrand(&r))
+            ;
+        for (uniform int b = 0; b < 64; ++b) 
+            if (((unsigned int64)r >> b) & 1)
+                ++set[b];
+    }
+
+    RET[programIndex] = 0;
+    for (uniform int b = 0; b < 64; ++b)  {
+        float r = (double)set[b] / (double)(count);
+        if (!(r >= .49 && r < .51)) {
+            print("% % - %\n", b, r, set[b]);
+            ++RET[programIndex];
+        }
+    }
+#endif
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 0;
+}
--- a/tests/rdrand-5.ispc
+++ b/tests/rdrand-5.ispc
@@ -0,0 +1,33 @@
+
+export uniform int width() { return programCount; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2)
+    RET[programIndex] = 0;
+#else
+
+    int set[32] = { 0 };
+    uniform int count = 1024*1024;
+    for (uniform int i = 0; i < count; ++i) {
+        int32 r;
+        while (!rdrand(&r))
+            ;
+        for (uniform int b = 0; b < 32; ++b) 
+            if (((unsigned int32)r >> b) & 1)
+                ++set[b];
+    }
+
+    RET[programIndex] = 0;
+    for (uniform int b = 0; b < 32; ++b)  {
+        float r = (double)set[b] / (double)(count);
+        if (!(r >= .49 && r < .51)) {
+            print("% % - %\n", b, r, set[b]);
+            ++RET[programIndex];
+        }
+    }
+#endif
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 0;
+}
--- a/tests/rdrand-6.ispc
+++ b/tests/rdrand-6.ispc
@@ -0,0 +1,35 @@
+
+export uniform int width() { return programCount; }
+
+export void f_f(uniform float RET[], uniform float aFOO[]) {
+#if !defined(ISPC_TARGET_AVX11) && !defined(ISPC_TARGET_AVX2)
+    RET[programIndex] = 0;
+#else
+
+    int set[32] = { 0 };
+    uniform int count = 1024*1024;
+    for (uniform int i = 0; i < count; ++i) {
+        uniform int32 rr[programCount];
+        int * ptr = rr + programIndex;
+        while (!rdrand(ptr))
+            ;
+        int32 r = rr[programIndex];
+        for (uniform int b = 0; b < 32; ++b) 
+            if (((unsigned int32)r >> b) & 1)
+                ++set[b];
+    }
+
+    RET[programIndex] = 0;
+    for (uniform int b = 0; b < 32; ++b)  {
+        float r = (double)set[b] / (double)(count);
+        if (!(r >= .49 && r < .51)) {
+            print("% % - %\n", b, r, set[b]);
+            ++RET[programIndex];
+        }
+    }
+#endif
+}
+
+export void result(uniform float RET[]) {
+    RET[programIndex] = 0;
+}