Add initial support for "avx1.1" targets for Ivy Bridge.

So far, only the use of the float/half conversion instructions distinguishes this from the "avx1" target. Partial work on issue #263.
2012-06-08 15:55:00 -07:00
parent 79e0a9f32a
commit 6c7df4cb6b
10 changed files with 296 additions and 40 deletions
--- a/4
+++ b/4
@@ -84,8 +84,8 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
 	type.cpp util.cpp
 HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
 	opt.h stmt.h sym.h type.h util.h
-TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
+TARGETS=avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 \
-	generic-16 generic-32 generic-64 generic-1
+	generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
 BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
 	builtins/dispatch.ll
 BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -804,6 +804,26 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
            FATAL("logic error in DefineStdlib");
        }
        break;
    case Target::AVX11:
        switch (g->target.vectorWidth) {
        case 8:
            extern unsigned char builtins_bitcode_avx11[];
            extern int builtins_bitcode_avx11_length;
            AddBitcodeToModule(builtins_bitcode_avx11, 
                               builtins_bitcode_avx11_length, 
                               module, symbolTable);
            break;
        case 16:
            extern unsigned char builtins_bitcode_avx11_x2[];
            extern int builtins_bitcode_avx11_x2_length;
            AddBitcodeToModule(builtins_bitcode_avx11_x2, 
                               builtins_bitcode_avx11_x2_length,
                               module,  symbolTable);
            break;
        default:
            FATAL("logic error in DefineStdlib");
        }
        break;
    case Target::AVX2:
        switch (g->target.vectorWidth) {
        case 8:
@@ -898,6 +918,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
    lDefineConstantInt("__have_native_half", g->target.hasHalf, module, 
                       symbolTable);
    lDefineConstantInt("__have_native_rand", g->target.hasRand, module, 
                       symbolTable);
    lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
                       module, symbolTable);
--- a/builtins/dispatch.ll
+++ b/builtins/dispatch.ll
@@ -48,8 +48,8 @@ declare void @abort() noreturn
 ;; corresponding to one of the Target::ISA enumerant values that gives the
 ;; most capable ISA that the curremt system can run.
 ;;
-;; Note: clang from LLVM 2.9 should be used if this is updated, for maximum
+;; Note: clang from LLVM 3.0 should be used if this is updated, for maximum
-;; backwards compatibility for anyone building ispc with LLVM 2.9.
+;; backwards compatibility for anyone building ispc with LLVM 3.0
 ;;
 ;; #include <stdint.h>
 ;; #include <stdlib.h>
@@ -80,9 +80,14 @@ declare void @abort() noreturn
 ;;         // Call cpuid with eax=7, ecx=0
 ;;         __cpuid_count(info, 7, 0);
 ;;         if ((info[1] & (1 << 5)) != 0)
-;;             return 3; // AVX2
+;;             return 4; // AVX2
-;;         else
+;;         else {
-;;             return 2; // AVX1
+;;             if ((info[2] & (1 << 29)) != 0 &&  // F16C
 ;;                 (info[2] & (1 << 30)) != 0)    // RDRAND
 ;;                 return 3; // AVX1 on IVB
 ;;             else
 ;;                 return 2; // AVX1
 ;;         }
 ;;     }
 ;;     else if ((info[2] & (1 << 19)) != 0)
 ;;         return 1; // SSE4
@@ -92,41 +97,47 @@ declare void @abort() noreturn
 ;;         abort();
 ;; }
-%0 = type { i32, i32, i32, i32 }
+define i32 @__get_system_isa() nounwind uwtable ssp {
 define i32 @__get_system_isa() nounwind ssp {
 entry:
-  %0 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  %0 = tail call { i32, i32, i32, i32 } asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
-  %asmresult9.i = extractvalue %0 %0, 2
+  %asmresult5.i = extractvalue { i32, i32, i32, i32 } %0, 2
-  %asmresult10.i = extractvalue %0 %0, 3
+  %asmresult6.i = extractvalue { i32, i32, i32, i32 } %0, 3
-  %and = and i32 %asmresult9.i, 268435456
+  %and = and i32 %asmresult5.i, 268435456
  %cmp = icmp eq i32 %and, 0
-  br i1 %cmp, label %if.else7, label %if.then
+  br i1 %cmp, label %if.else14, label %if.then
 if.then:                                          ; preds = %entry
-  %1 = tail call %0 asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
+  %1 = tail call { i32, i32, i32, i32 } asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
-  %asmresult9.i24 = extractvalue %0 %1, 1
+  %asmresult4.i29 = extractvalue { i32, i32, i32, i32 } %1, 1
-  %and4 = lshr i32 %asmresult9.i24, 5
+  %and3 = and i32 %asmresult4.i29, 32
-  %2 = and i32 %and4, 1
+  %cmp4 = icmp eq i32 %and3, 0
-  %3 = or i32 %2, 2
+  br i1 %cmp4, label %if.else, label %return
 if.else:                                          ; preds = %if.then
  %asmresult5.i30 = extractvalue { i32, i32, i32, i32 } %1, 2
  %2 = and i32 %asmresult5.i30, 1610612736
  %3 = icmp eq i32 %2, 1610612736
  br i1 %3, label %return, label %if.else13
 if.else13:                                        ; preds = %if.else
  br label %return
-if.else7:                                         ; preds = %entry
+if.else14:                                        ; preds = %entry
-  %and10 = and i32 %asmresult9.i, 524288
+  %and16 = and i32 %asmresult5.i, 524288
  %cmp11 = icmp eq i32 %and10, 0
  br i1 %cmp11, label %if.else13, label %return
 if.else13:                                        ; preds = %if.else7
  %and16 = and i32 %asmresult10.i, 67108864
  %cmp17 = icmp eq i32 %and16, 0
  br i1 %cmp17, label %if.else19, label %return
-if.else19:                                        ; preds = %if.else13
+if.else19:                                        ; preds = %if.else14
  %and21 = and i32 %asmresult6.i, 67108864
  %cmp22 = icmp eq i32 %and21, 0
  br i1 %cmp22, label %if.else24, label %return
 if.else24:                                        ; preds = %if.else19
  tail call void @abort() noreturn nounwind
  unreachable
-return:                                           ; preds = %if.else13, %if.else7, %if.then
+return:                                           ; preds = %if.else19, %if.else14, %if.else13, %if.else, %if.then
-  %retval.0 = phi i32 [ %3, %if.then ], [ 1, %if.else7 ], [ 0, %if.else13 ]
+  %retval.0 = phi i32 [ 2, %if.else13 ], [ 4, %if.then ], [ 3, %if.else ], [ 1, %if.else14 ], [ 0, %if.else19 ]
  ret i32 %retval.0
 }
--- a/builtins/target-avx1-x2.ll
+++ b/builtins/target-avx1-x2.ll
@@ -61,10 +61,12 @@ define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonl
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; half conversion routines
 ifelse(NO_HALF_DECLARES, `1', `', `
 declare float @__half_to_float_uniform(i16 %v) nounwind readnone
 declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
 declare i16 @__float_to_half_uniform(float %v) nounwind readnone
 declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
 ')
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; gather
--- a/builtins/target-avx1.ll
+++ b/builtins/target-avx1.ll
@@ -61,10 +61,12 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly a
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; half conversion routines
 ifelse(NO_HALF_DECLARES, `1', `', `
 declare float @__half_to_float_uniform(i16 %v) nounwind readnone
 declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
 declare i16 @__float_to_half_uniform(float %v) nounwind readnone
 declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
 ')
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; gather
--- a/builtins/target-avx11-x2.ll
+++ b/builtins/target-avx11-x2.ll
@@ -0,0 +1,87 @@
 ;;  Copyright (c) 2012, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
 ;;  modification, are permitted provided that the following conditions are
 ;;  met:
 ;;
 ;;    * Redistributions of source code must retain the above copyright
 ;;      notice, this list of conditions and the following disclaimer.
 ;;
 ;;    * Redistributions in binary form must reproduce the above copyright
 ;;      notice, this list of conditions and the following disclaimer in the
 ;;      documentation and/or other materials provided with the distribution.
 ;;
 ;;    * Neither the name of Intel Corporation nor the names of its
 ;;      contributors may be used to endorse or promote products derived from
 ;;      this software without specific prior written permission.
 ;;
 ;;
 ;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 ;;   IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 ;;   TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 ;;   PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 ;;   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 ;;   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 ;;   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 ;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 ;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 ;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
 include(`target-avx1-x2.ll')
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float/half conversions
 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
 ; 0 is round nearest even
 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
 define <16 x float> @__half_to_float_varying(<16 x i16> %v) nounwind readnone {
  %r_0 = shufflevector <16 x i16> %v, <16 x i16> undef,
             <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %vr_0 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_0)
  %r_1 = shufflevector <16 x i16> %v, <16 x i16> undef,
             <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  %vr_1 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %r_1)
  %r = shufflevector <8 x float> %vr_0, <8 x float> %vr_1, 
           <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
                       i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x float> %r
 }
 define <16 x i16> @__float_to_half_varying(<16 x float> %v) nounwind readnone {
  %r_0 = shufflevector <16 x float> %v, <16 x float> undef,
             <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %vr_0 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_0, i32 0)
  %r_1 = shufflevector <16 x float> %v, <16 x float> undef,
             <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  %vr_1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %r_1, i32 0)
  %r = shufflevector <8 x i16> %vr_0, <8 x i16> %vr_1, 
           <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
                       i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i16> %r
 }
 define float @__half_to_float_uniform(i16 %v) nounwind readnone {
  %v1 = bitcast i16 %v to <1 x i16>
  %vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
           <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
                      i32 undef, i32 undef, i32 undef, i32 undef>
  %rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
  %r = extractelement <8 x float> %rv, i32 0
  ret float %r
 }
 define i16 @__float_to_half_uniform(float %v) nounwind readnone {
  %v1 = bitcast float %v to <1 x float>
  %vv = shufflevector <1 x float> %v1, <1 x float> undef,
           <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
                      i32 undef, i32 undef, i32 undef, i32 undef>
  ; round to nearest even
  %rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
  %r = extractelement <8 x i16> %rv, i32 0
  ret i16 %r
 }
--- a/builtins/target-avx11.ll
+++ b/builtins/target-avx11.ll
@@ -0,0 +1,71 @@
 ;;  Copyright (c) 2012, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
 ;;  modification, are permitted provided that the following conditions are
 ;;  met:
 ;;
 ;;    * Redistributions of source code must retain the above copyright
 ;;      notice, this list of conditions and the following disclaimer.
 ;;
 ;;    * Redistributions in binary form must reproduce the above copyright
 ;;      notice, this list of conditions and the following disclaimer in the
 ;;      documentation and/or other materials provided with the distribution.
 ;;
 ;;    * Neither the name of Intel Corporation nor the names of its
 ;;      contributors may be used to endorse or promote products derived from
 ;;      this software without specific prior written permission.
 ;;
 ;;
 ;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 ;;   IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 ;;   TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 ;;   PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 ;;   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 ;;   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 ;;   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 ;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 ;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 ;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
 include(`target-avx1.ll')
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float/half conversions
 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readnone
 ; 0 is round nearest even
 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readnone
 define <8 x float> @__half_to_float_varying(<8 x i16> %v) nounwind readnone {
  %r = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %v)
  ret <8 x float> %r
 }
 define <8 x i16> @__float_to_half_varying(<8 x float> %v) nounwind readnone {
  %r = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %v, i32 0)
  ret <8 x i16> %r
 }
 define float @__half_to_float_uniform(i16 %v) nounwind readnone {
  %v1 = bitcast i16 %v to <1 x i16>
  %vv = shufflevector <1 x i16> %v1, <1 x i16> undef,
           <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
                      i32 undef, i32 undef, i32 undef, i32 undef>
  %rv = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %vv)
  %r = extractelement <8 x float> %rv, i32 0
  ret float %r
 }
 define i16 @__float_to_half_uniform(float %v) nounwind readnone {
  %v1 = bitcast float %v to <1 x float>
  %vv = shufflevector <1 x float> %v1, <1 x float> undef,
           <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef,
                      i32 undef, i32 undef, i32 undef, i32 undef>
  ; round to nearest even
  %rv = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %vv, i32 0)
  %r = extractelement <8 x i16> %rv, i32 0
  ret i16 %r
 }
--- a/ispc.cpp
+++ b/ispc.cpp
@@ -93,8 +93,14 @@ lGetSystemISA() {
        __cpuidex(info, 7, 0);
        if ((info[1] & (1 << 5)) != 0)
            return "avx2";
-        else
+        else {
-            return "avx";
+            // ivybridge?
            if ((info[2] & (1 << 29)) != 0 &&  // F16C
                (info[2] & (1 << 30)) != 0)    // RDRAND
                return "avx1.1";
            else
                return "avx";
        }
    }
    else if ((info[2] & (1 << 19)) != 0)
        return "sse4";
@@ -198,7 +204,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
    }
    // This is the case for most of them
-    t->hasHalf = t->hasTranscendentals = false;
+    t->hasHalf = t->hasRand = t->hasTranscendentals = false;
    if (!strcasecmp(isa, "sse2")) {
        t->isa = Target::SSE2;
@@ -284,7 +290,7 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
        t->maskingIsFree = false;
        t->maskBitCount = 32;
    }
-    else if (!strcasecmp(isa, "avx")) {
+    else if (!strcasecmp(isa, "avx") || !strcasecmp(isa, "avx1")) {
        t->isa = Target::AVX;
        t->nativeVectorWidth = 8;
        t->vectorWidth = 8;
@@ -292,32 +298,54 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
        t->maskingIsFree = false;
        t->maskBitCount = 32;
    }
-    else if (!strcasecmp(isa, "avx-x2")) {
+    else if (!strcasecmp(isa, "avx-x2") || !strcasecmp(isa, "avx1-x2")) {
-        t->isa = Target::AVX;
+        t->isa = Target::AVX11;
        t->nativeVectorWidth = 8;
        t->vectorWidth = 16;
        t->attributes = "+avx,+popcnt,+cmov";
        t->maskingIsFree = false;
        t->maskBitCount = 32;
    }
    else if (!strcasecmp(isa, "avx1.1")) {
        t->isa = Target::AVX11;
        t->nativeVectorWidth = 8;
        t->vectorWidth = 8;
        t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
        t->maskingIsFree = false;
        t->maskBitCount = 32;
        t->hasHalf = true;
        t->hasRand = true;
    }
    else if (!strcasecmp(isa, "avx1.1-x2")) {
        t->isa = Target::AVX11;
        t->nativeVectorWidth = 8;
        t->vectorWidth = 16;
        t->attributes = "+avx,+popcnt,+cmov,+f16c,+rdrand";
        t->maskingIsFree = false;
        t->maskBitCount = 32;
        t->hasHalf = true;
        t->hasRand = true;
    }
 #ifndef LLVM_3_0
    else if (!strcasecmp(isa, "avx2")) {
        t->isa = Target::AVX2;
        t->nativeVectorWidth = 8;
        t->vectorWidth = 8;
-        t->attributes = "+avx2,+popcnt,+cmov,+f16c";
+        t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
        t->maskingIsFree = false;
        t->maskBitCount = 32;
        t->hasHalf = true;
        t->hasRand = true;
    }
    else if (!strcasecmp(isa, "avx2-x2")) {
        t->isa = Target::AVX2;
        t->nativeVectorWidth = 16;
        t->vectorWidth = 16;
-        t->attributes = "+avx2,+popcnt,+cmov,+f16c";
+        t->attributes = "+avx2,+popcnt,+cmov,+f16c,+rdrand";
        t->maskingIsFree = false;
        t->maskBitCount = 32;
        t->hasHalf = true;
        t->hasRand = true;
    }
 #endif // !LLVM_3_0
    else {
@@ -360,7 +388,7 @@ const char *
 Target::SupportedTargetISAs() {
    return "sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2"
 #ifndef LLVM_3_0
-        ", avx2, avx2-x2"
+        ", avx1.1, avx1.1-x2, avx2, avx2-x2"
 #endif // !LLVM_3_0
        ", generic-1, generic-4, generic-8, generic-16, generic-32";
 }
@@ -426,6 +454,8 @@ Target::GetISAString() const {
        return "sse4";
    case Target::AVX:
        return "avx";
    case Target::AVX11:
        return "avx11";
    case Target::AVX2:
        return "avx2";
    case Target::GENERIC:
--- a/ispc.h
+++ b/ispc.h
@@ -207,7 +207,7 @@ struct Target {
        flexible/performant of them will apear last in the enumerant.  Note
        also that __best_available_isa() needs to be updated if ISAs are
        added or the enumerant values are reordered.  */
-    enum ISA { SSE2, SSE4, AVX, AVX2, GENERIC, NUM_ISAS };
+    enum ISA { SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, NUM_ISAS };
    /** Instruction set being compiled to. */
    ISA isa;
@@ -252,6 +252,9 @@ struct Target {
        conversions. */
    bool hasHalf;
    /** Indicates whether there is an ISA random number instruciton. */
    bool hasRand;
    /** Indicates whether the target has support for transcendentals (beyond
        sqrt, which we assume that all of them handle). */
    bool hasTranscendentals;
--- a/ispc.vcxproj
+++ b/ispc.vcxproj
@@ -20,6 +20,8 @@
    <ClCompile Include="func.cpp" />
    <ClCompile Include="gen-bitcode-avx1.cpp" />
    <ClCompile Include="gen-bitcode-avx1-x2.cpp" />
    <ClCompile Include="gen-bitcode-avx11.cpp" />
    <ClCompile Include="gen-bitcode-avx11-x2.cpp" />
    <ClCompile Include="gen-bitcode-avx2.cpp" />
    <ClCompile Include="gen-bitcode-avx2-x2.cpp" />
    <ClCompile Include="gen-bitcode-c-32.cpp" />
@@ -188,6 +190,32 @@
      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx1-x2.cpp</Message>
    </CustomBuild>
  </ItemGroup>
  <ItemGroup>
    <CustomBuild Include="builtins\target-avx11.ll">
      <FileType>Document</FileType>
      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll &gt; gen-bitcode-avx11.cpp</Command>
      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx11.cpp</Outputs>
      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11.ll | python bitcode2cpp.py builtins\target-avx11.ll &gt; gen-bitcode-avx11.cpp</Command>
      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx11.cpp</Outputs>
      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx11.cpp</Message>
      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx11.cpp</Message>
    </CustomBuild>
  </ItemGroup>
  <ItemGroup>
    <CustomBuild Include="builtins\target-avx11-x2.ll">
      <FileType>Document</FileType>
      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll &gt; gen-bitcode-avx11-x2.cpp</Command>
      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx11-x2.cpp</Outputs>
      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll</AdditionalInputs>
      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx11-x2.ll | python bitcode2cpp.py builtins\target-avx11-x2.ll &gt; gen-bitcode-avx11-x2.cpp</Command>
      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx11-x2.cpp</Outputs>
      <AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll</AdditionalInputs>
      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx11-x2.cpp</Message>
      <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx11-x2.cpp</Message>
    </CustomBuild>
  </ItemGroup>
  <ItemGroup>
    <CustomBuild Include="builtins\target-avx2.ll">
      <FileType>Document</FileType>