From 6c7df4cb6b9c9e69fe91a090862bcf0f48d12786 Mon Sep 17 00:00:00 2001
From: Matt Pharr <matt.pharr@intel.com>
Date: Fri, 8 Jun 2012 15:55:00 -0700
Subject: [PATCH] Add initial support for "avx1.1" targets for Ivy Bridge.

So far, only the use of the float/half conversion instructions distinguishes
this from the "avx1" target.

Partial work on issue #263.
---
 Makefile                    |  4 +-
 builtins.cpp                | 22 ++++++++++
 builtins/dispatch.ll        | 67 ++++++++++++++++------------
 builtins/target-avx1-x2.ll  |  2 +
 builtins/target-avx1.ll     |  2 +
 builtins/target-avx11-x2.ll | 87 +++++++++++++++++++++++++++++++++++++
 builtins/target-avx11.ll    | 71 ++++++++++++++++++++++++++++++
 ispc.cpp                    | 48 ++++++++++++++++----
 ispc.h                      |  5 ++-
 ispc.vcxproj                | 28 ++++++++++++
 10 files changed, 296 insertions(+), 40 deletions(-)
 create mode 100644 builtins/target-avx11-x2.ll
 create mode 100644 builtins/target-avx11.ll

diff --git a/Makefile b/Makefile
index 0d61c611..d4a8e3d3 100644
--- a/Makefile
+++ b/Makefile
@@ -84,8 +84,8 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
 	type.cpp util.cpp
 HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
 	opt.h stmt.h sym.h type.h util.h
-TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
-	generic-16 generic-32 generic-64 generic-1
+TARGETS=avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 \
+	generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
 BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
 	builtins/dispatch.ll
 BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
diff --git a/builtins.cpp b/builtins.cpp
index db55758a..714390d7 100644
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -804,6 +804,26 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
             FATAL("logic error in DefineStdlib");
         }
         break;
+    case Target::AVX11:
+        switch (g->target.vectorWidth) {
+        case 8:
+            extern unsigned char builtins_bitcode_avx11[];
+            extern int builtins_bitcode_avx11_length;
+            AddBitcodeToModule(builtins_bitcode_avx11, 
+                               builtins_bitcode_avx11_length, 
+                               module, symbolTable);
+            break;
+        case 16:
+            extern unsigned char builtins_bitcode_avx11_x2[];
+            extern int builtins_bitcode_avx11_x2_length;
+            AddBitcodeToModule(builtins_bitcode_avx11_x2, 
+                               builtins_bitcode_avx11_x2_length,
+                               module,  symbolTable);
+            break;
+        default:
+            FATAL("logic error in DefineStdlib");
+        }
+        break;
     case Target::AVX2:
         switch (g->target.vectorWidth) {
         case 8:
@@ -898,6 +918,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
 
     lDefineConstantInt("__have_native_half", g->target.hasHalf, module, 
                        symbolTable);
+    lDefineConstantInt("__have_native_rand", g->target.hasRand, module, 
+                       symbolTable);
     lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
                        module, symbolTable);
 
diff --git a/builtins/dispatch.ll b/builtins/dispatch.ll
index e61292aa..b9db3543 100644
--- a/builtins/dispatch.ll
+++ b/builtins/dispatch.ll
@@ -48,8 +48,8 @@ declare void @abort() noreturn
 ;; corresponding to one of the Target::ISA enumerant values that gives the
 ;; most capable ISA that the curremt system can run.
 ;;
-;; Note: clang from LLVM 2.9 should be used if this is updated, for maximum
-;; backwards compatibility for anyone building ispc with LLVM 2.9.
+;; Note: clang from LLVM 3.0 should be used if this is updated, for maximum
+;; backwards compatibility for anyone building ispc with LLVM 3.0
 ;;
 ;; #include <stdint.h>
 ;; #include <stdlib.h>
@@ -80,9 +80,14 @@ declare void @abort() noreturn
 ;;         // Call cpuid with eax=7, ecx=0
 ;;         __cpuid_count(info, 7, 0);
 ;;         if ((info[1] & (1 << 5)) != 0)
-;;             return 3; // AVX2
-;;         else
-;;             return 2; // AVX1
+;;             return 4; // AVX2
+;;         else {
+;;             if ((info[2] & (1 << 29)) != 0 &&  // F16C
+;;                 (info[2] & (1 << 30)) != 0)    // RDRAND
+;;                 return 3; // AVX1 on IVB
+;;             else
+;;                 return 2; // AVX1
+;;         }
 ;;     }
 ;;     else if ((info[2] & (1 << 19)) != 0)
 ;;         return 1; // SSE4
@@ -92,41 +97,47 @@ declare void @abort() noreturn
 ;;         abort();
 ;; }
 
-%0 = type { i32, i32, i32, i32 }
-
-define i32 @__get_system_isa() nounwind ssp {
+define i32 @__get_system_isa() nounwind uwtable ssp {
 entry:
-  %0 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
-  %asmresult9.i = extractvalue %0 %0, 2
-  %asmresult10.i = extractvalue %0 %0, 3
-  %and = and i32 %asmresult9.i, 268435456
+  %0 = tail call { i32, i32, i32, i32 } asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  %asmresult5.i = extractvalue { i32, i32, i32, i32 } %0, 2
+  %asmresult6.i = extractvalue { i32, i32, i32, i32 } %0, 3
+  %and = and i32 %asmresult5.i, 268435456
   %cmp = icmp eq i32 %and, 0
-  br i1 %cmp, label %if.else7, label %if.then
+  br i1 %cmp, label %if.else14, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = tail call %0 asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
-  %asmresult9.i24 = extractvalue %0 %1, 1
-  %and4 = lshr i32 %asmresult9.i24, 5
-  %2 = and i32 %and4, 1
-  %3 = or i32 %2, 2
+  %1 = tail call { i32, i32, i32, i32 } asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
+  %asmresult4.i29 = extractvalue { i32, i32, i32, i32 } %1, 1
+  %and3 = and i32 %asmresult4.i29, 32
+  %cmp4 = icmp eq i32 %and3, 0
+  br i1 %cmp4, label %if.else, label %return
+
+if.else:                                          ; preds = %if.then
+  %asmresult5.i30 = extractvalue { i32, i32, i32, i32 } %1, 2
+  %2 = and i32 %asmresult5.i30, 1610612736
+  %3 = icmp eq i32 %2, 1610612736
+  br i1 %3, label %return, label %if.else13
+
+if.else13:                                        ; preds = %if.else
   br label %return
 
-if.else7:                                         ; preds = %entry
-  %and10 = and i32 %asmresult9.i, 524288
-  %cmp11 = icmp eq i32 %and10, 0
-  br i1 %cmp11, label %if.else13, label %return
-
-if.else13:                                        ; preds = %if.else7
-  %and16 = and i32 %asmresult10.i, 67108864
+if.else14:                                        ; preds = %entry
+  %and16 = and i32 %asmresult5.i, 524288
   %cmp17 = icmp eq i32 %and16, 0
   br i1 %cmp17, label %if.else19, label %return
 
-if.else19:                                        ; preds = %if.else13
+if.else19:                                        ; preds = %if.else14
+  %and21 = and i32 %asmresult6.i, 67108864
+  %cmp22 = icmp eq i32 %and21, 0
+  br i1 %cmp22, label %if.else24, label %return
+
+if.else24:                                        ; preds = %if.else19
   tail call void @abort() noreturn nounwind
   unreachable
 
-return:                                           ; preds = %if.else13, %if.else7, %if.then
-  %retval.0 = phi i32 [ %3, %if.then ], [ 1, %if.else7 ], [ 0, %if.else13 ]
+return:                                           ; preds = %if.else19, %if.else14, %if.else13, %if.else, %if.then
+  %retval.0 = phi i32 [ 2, %if.else13 ], [ 4, %if.then ], [ 3, %if.else ], [ 1, %if.else14 ], [ 0, %if.else19 ]
   ret i32 %retval.0
 }
 
diff --git a/builtins/target-avx1-x2.ll b/builtins/target-avx1-x2.ll
index e79b9f2b..efde5d10 100644
--- a/builtins/target-avx1-x2.ll
+++ b/builtins/target-avx1-x2.ll
@@ -61,10 +61,12 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; half conversion routines
 
+ifelse(NO_HALF_DECLARES, `1', `', `
 declare float @__half_to_float_uniform(i16 %v) nounwind readnone
 declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
 declare i16 @__float_to_half_uniform(float %v) nounwind readnone
 declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
+')
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; gather
diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll
index fc6bdcf1..64f8ad33 100644
--- a/builtins/target-avx1.ll
+++ b/builtins/target-avx1.ll
@@ -61,10 +61,12 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; half conversion routines
 
+ifelse(NO_HALF_DECLARES, `1', `', `
 declare float @__half_to_float_uniform(i16 %v) nounwind readnone
 declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
 declare i16 @__float_to_half_uniform(float %v) nounwind readnone
 declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
+')
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; gather
diff --git a/builtins/target-avx11-x2.ll b/builtins/target-avx11-x2.ll
new file mode 100644
index 00000000..e2bc599d
--- /dev/null
+++ b/builtins/target-avx11-x2.ll
@@ -0,0 +1,87 @@
+;;  Copyright (c) 2012, Intel Corporation
+;;  All rights reserved.
+;;
+;;  Redistribution and use in source and binary forms, with or without
+;;  modification, are permitted provided that the following conditions are
+;;  met:
+;;
+;;    * Redistributions of source code must retain the above copyright
+;;      notice, this list of conditions and the following disclaimer.
+;;
+;;    * Redistributions in binary form must reproduce the above copyright
+;;      notice, this list of conditions and the following disclaimer in the
+;;      documentation and/or other materials provided with the distribution.
+;;
+;;    * Neither the name of Intel Corporation nor the names of its
+;;      contributors may be used to endorse or promote products derived from
+;;      this software without specific prior written permission.
+;;
+;;
+;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+;;   IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+;;   TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+;;   PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+;;   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;;   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;;   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
+
+include(`target-avx1-x2.ll')
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; float/half conversions
+
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
+; 0 is round nearest even
+declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
+
+define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
+  %r_0 = shufflevector <16 x i16> %v, <16 x i16> undef,
+             <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %vr_0 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_0)
+  %r_1 = shufflevector <16 x i16> %v, <16 x i16> undef,
+             <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vr_1 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_1)
+  %r = shufflevector <8 x float> %vr_0, <8 x float> %vr_1, 
+           <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+                       i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x float> %r
+}
+
+define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
+  %r_0 = shufflevector <16 x float> %v, <16 x float> undef,
+             <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %vr_0 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_0, i32 0)
+  %r_1 = shufflevector <16 x float> %v, <16 x float> undef,
+             <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vr_1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_1, i32 0)
+  %r = shufflevector <8 x i16> %vr_0, <8 x i16> %vr_1, 
+           <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+                       i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i16> %r
+}
+
+define float @__half_to_float_uniform(i16 %v) nounwind readnone {
+  %v1 = bitcast i16 %v to <1 x i16>
+  %vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
+           <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
+                      i32 undef, i32 undef, i32 undef, i32 undef>
+  %rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
+  %r = extractelement <8 x float> %rv, i32 0
+  ret float %r
+}
+
+define i16 @__float_to_half_uniform(float %v) nounwind readnone {
+  %v1 = bitcast float %v to <1 x float>
+  %vv = shufflevector <1 x float> %v1, <1 x float> undef,
+           <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
+                      i32 undef, i32 undef, i32 undef, i32 undef>
+  ; round to nearest even
+  %rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
+  %r = extractelement <8 x i16> %rv, i32 0
+  ret i16 %r
+}
+
diff --git a/builtins/target-avx11.ll b/builtins/target-avx11.ll
new file mode 100644
index 00000000..54aa35af
--- /dev/null
+++ b/builtins/target-avx11.ll
@@ -0,0 +1,71 @@
+;;  Copyright (c) 2012, Intel Corporation
+;;  All rights reserved.
+;;
+;;  Redistribution and use in source and binary forms, with or without
+;;  modification, are permitted provided that the following conditions are
+;;  met:
+;;
+;;    * Redistributions of source code must retain the above copyright
+;;      notice, this list of conditions and the following disclaimer.
+;;
+;;    * Redistributions in binary form must reproduce the above copyright
+;;      notice, this list of conditions and the following disclaimer in the
+;;      documentation and/or other materials provided with the distribution.
+;;
+;;    * Neither the name of Intel Corporation nor the names of its
+;;      contributors may be used to endorse or promote products derived from
+;;      this software without specific prior written permission.
+;;
+;;
+;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+;;   IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+;;   TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+;;   PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+;;   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;;   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;;   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
+
+include(`target-avx1.ll')
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; float/half conversions
+
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
+; 0 is round nearest even
+declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
+
+define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
+  %r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
+  ret <8 x float> %r
+}
+
+define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
+  %r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
+  ret <8 x i16> %r
+}
+
+define float @__half_to_float_uniform(i16 %v) nounwind readnone {
+  %v1 = bitcast i16 %v to <1 x i16>
+  %vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
+           <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
+                      i32 undef, i32 undef, i32 undef, i32 undef>
+  %rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
+  %r = extractelement <8 x float> %rv, i32 0
+  ret float %r
+}
+
+define i16 @__float_to_half_uniform(float %v) nounwind readnone {
+  %v1 = bitcast float %v to <1 x float>
+  %vv = shufflevector <1 x float> %v1, <1 x float> undef,
+           <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
+                      i32 undef, i32 undef, i32 undef, i32 undef>
+  ; round to nearest even
+  %rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
+  %r = extractelement <8 x i16> %rv, i32 0
+  ret i16 %r
+}
+
diff --git a/ispc.cpp b/ispc.cpp
index 38b9ec70..f520aa75 100644
--- a/ispc.cpp
+++ b/ispc.cpp
@@ -93,8 +93,14 @@ lGetSystemISA() {
         __cpuidex(info, 7, 0);
         if ((info[1] & (1 << 5)) != 0)
             return "avx2";
-        else
-            return "avx";
+        else {
+            // ivybridge?
+            if ((info[2] & (1 << 29)) != 0 &&  // F16C
+                (info[2] & (1 << 30)) != 0)    // RDRAND
+                return "avx1.1";
+            else
+                return "avx";
+        }
     }
     else if ((info[2] & (1 << 19)) != 0)
         return "sse4";
@@ -198,7 +204,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
     }
 
     // This is the case for most of them
-    t->hasHalf = t->hasTranscendentals = false;
+    t->hasHalf = t->hasRand = t->hasTranscendentals = false;
 
     if (!strcasecmp(isa, "sse2")) {
         t->isa = Target::SSE2;
@@ -284,7 +290,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
         t->maskingIsFree = false;
         t->maskBitCount = 32;
     }
-    else if (!strcasecmp(isa, "avx")) {
+    else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1")) {
         t->isa = Target::AVX;
         t->nativeVectorWidth = 8;
         t->vectorWidth = 8;
@@ -292,32 +298,54 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
         t->maskingIsFree = false;
         t->maskBitCount = 32;
     }
-    else if (!strcasecmp(isa, "avx-x2")) {
-        t->isa = Target::AVX;
+    else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2")) {
+        t->isa = Target::AVX11;
         t->nativeVectorWidth = 8;
         t->vectorWidth = 16;
         t->attributes = "+avx,+popcnt,+cmov";
         t->maskingIsFree = false;
         t->maskBitCount = 32;
     }
+    else if (!strcasecmp(isa, "avx1.1")) {
+        t->isa = Target::AVX11;
+        t->nativeVectorWidth = 8;
+        t->vectorWidth = 8;
+        t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
+        t->maskingIsFree = false;
+        t->maskBitCount = 32;
+        t->hasHalf = true;
+        t->hasRand = true;
+    }
+    else if (!strcasecmp(isa, "avx1.1-x2")) {
+        t->isa = Target::AVX11;
+        t->nativeVectorWidth = 8;
+        t->vectorWidth = 16;
+        t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
+        t->maskingIsFree = false;
+        t->maskBitCount = 32;
+        t->hasHalf = true;
+        t->hasRand = true;
+    }
 #ifndef LLVM_3_0
     else if (!strcasecmp(isa, "avx2")) {
         t->isa = Target::AVX2;
         t->nativeVectorWidth = 8;
         t->vectorWidth = 8;
-        t->attributes = "+avx2,+popcnt,+cmov,+f16c";
+        t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
         t->maskingIsFree = false;
         t->maskBitCount = 32;
         t->hasHalf = true;
+        t->hasRand = true;
     }
     else if (!strcasecmp(isa, "avx2-x2")) {
         t->isa = Target::AVX2;
         t->nativeVectorWidth = 16;
         t->vectorWidth = 16;
-        t->attributes = "+avx2,+popcnt,+cmov,+f16c";
+        t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
         t->maskingIsFree = false;
         t->maskBitCount = 32;
         t->hasHalf = true;
+        t->hasRand = true;
     }
 #endif // !LLVM_3_0
     else {
@@ -360,7 +388,7 @@ const char *
 Target::SupportedTargetISAs() {
     return "sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2"
 #ifndef LLVM_3_0
-        ", avx2, avx2-x2"
+        ", avx1.1, avx1.1-x2, avx2, avx2-x2"
 #endif // !LLVM_3_0
         ", generic-1, generic-4, generic-8, generic-16, generic-32";
 }
@@ -426,6 +454,8 @@ Target::GetISAString() const {
         return "sse4";
     case Target::AVX:
         return "avx";
+    case Target::AVX11:
+        return "avx11";
     case Target::AVX2:
         return "avx2";
     case Target::GENERIC:
diff --git a/ispc.h b/ispc.h
index 26e592ff..807faf78 100644
--- a/ispc.h
+++ b/ispc.h
@@ -207,7 +207,7 @@ struct Target {
         flexible/performant of them will apear last in the enumerant.  Note
         also that __best_available_isa() needs to be updated if ISAs are
         added or the enumerant values are reordered.  */
-    enum ISA { SSE2, SSE4, AVX, AVX2, GENERIC, NUM_ISAS };
+    enum ISA { SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, NUM_ISAS };
 
     /** Instruction set being compiled to. */
     ISA isa;
@@ -252,6 +252,9 @@ struct Target {
         conversions. */
     bool hasHalf;
 
+    /** Indicates whether there is an ISA random number instruciton. */
+    bool hasRand;
+
     /** Indicates whether the target has support for transcendentals (beyond
         sqrt, which we assume that all of them handle). */
     bool hasTranscendentals;
diff --git a/ispc.vcxproj b/ispc.vcxproj
index 6478df4e..cef62d21 100755
--- a/ispc.vcxproj
+++ b/ispc.vcxproj
@@ -20,6 +20,8 @@
     <ClCompile Include="func.cpp" />
     <ClCompile Include="gen-bitcode-avx1.cpp" />
     <ClCompile Include="gen-bitcode-avx1-x2.cpp" />
+    <ClCompile Include="gen-bitcode-avx11.cpp" />
+    <ClCompile Include="gen-bitcode-avx11-x2.cpp" />
     <ClCompile Include="gen-bitcode-avx2.cpp" />
     <ClCompile Include="gen-bitcode-avx2-x2.cpp" />
     <ClCompile Include="gen-bitcode-c-32.cpp" />
@@ -188,6 +190,32 @@
       <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx1-x2.cpp</Message>
     </CustomBuild>
   </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="builtins\target-avx11.ll">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll &gt; gen-bitcode-avx11.cpp</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx11.cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll &gt; gen-bitcode-avx11.cpp</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx11.cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx11.cpp</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx11.cpp</Message>
+    </CustomBuild>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="builtins\target-avx11-x2.ll">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll &gt; gen-bitcode-avx11-x2.cpp</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx11-x2.cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll</AdditionalInputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll &gt; gen-bitcode-avx11-x2.cpp</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx11-x2.cpp</Outputs>
+      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll</AdditionalInputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx11-x2.cpp</Message>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx11-x2.cpp</Message>
+    </CustomBuild>
+  </ItemGroup>
   <ItemGroup>
     <CustomBuild Include="builtins\target-avx2.ll">
       <FileType>Document</FileType>