couple months. The changes are tested with LLVM 3.9, 4.0 and trunk on MacOS (sse4, avx2, skx).
261 lines
10 KiB
LLVM
261 lines
10 KiB
LLVM
;; Copyright (c) 2011-2016, Intel Corporation
|
|
;; All rights reserved.
|
|
;;
|
|
;; Redistribution and use in source and binary forms, with or without
|
|
;; modification, are permitted provided that the following conditions are
|
|
;; met:
|
|
;;
|
|
;; * Redistributions of source code must retain the above copyright
|
|
;; notice, this list of conditions and the following disclaimer.
|
|
;;
|
|
;; * Redistributions in binary form must reproduce the above copyright
|
|
;; notice, this list of conditions and the following disclaimer in the
|
|
;; documentation and/or other materials provided with the distribution.
|
|
;;
|
|
;; * Neither the name of Intel Corporation nor the names of its
|
|
;; contributors may be used to endorse or promote products derived from
|
|
;; this software without specific prior written permission.
|
|
;;
|
|
;;
|
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
;; This file defines various functions that are used when generating the
|
|
;; the "dispatch" object/assembly file that has entrypoints for each
|
|
;; exported function in a module that dispatch to the best available
|
|
;; variant of that function that will run on the system's CPU.
|
|
|
|
;; Stores the best target ISA that the system on which we're actually
|
|
;; running supports. -1 represents "uninitialized", otherwise this value
|
|
;; should correspond to one of the enumerant values of Target::ISA from
|
|
;; ispc.h.
|
|
|
|
@__system_best_isa = internal global i32 -1
|
|
|
|
;; The below is the result of running "clang -O2 -emit-llvm -c -o -" on the
|
|
;; following code... Specifically, __get_system_isa should return a value
|
|
;; corresponding to one of the Target::ISA enumerant values that gives the
|
|
;; most capable ISA that the curremt system can run.
|
|
;;
|
|
;;
|
|
;; #include <stdint.h>
|
|
;; #include <stdlib.h>
|
|
;;
|
|
;; static void __cpuid(int info[4], int infoType) {
|
|
;; __asm__ __volatile__ ("cpuid"
|
|
;; : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
|
|
;; : "0" (infoType));
|
|
;; }
|
|
;;
|
|
;; // Save %ebx in case it's the PIC register.
|
|
;; static void __cpuid_count(int info[4], int level, int count) {
|
|
;; __asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
|
|
;; "cpuid\n\t"
|
|
;; "xchg{l}\t{%%}ebx, %1\n\t"
|
|
;; : "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
|
|
;; : "0" (level), "2" (count));
|
|
;; }
|
|
;;
|
|
;; static int __os_has_avx_support() {
|
|
;; // Check xgetbv; this uses a .byte sequence instead of the instruction
|
|
;; // directly because older assemblers do not include support for xgetbv and
|
|
;; // there is no easy way to conditionally compile based on the assembler used.
|
|
;; int rEAX, rEDX;
|
|
;; __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
|
|
;; return (rEAX & 6) == 6;
|
|
;; }
|
|
;;
|
|
;; static int __os_has_avx512_support() {
|
|
;; // Check if the OS saves the XMM, YMM and ZMM registers, i.e. it supports AVX2 and AVX512.
|
|
;; // See section 2.1 of software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf
|
|
;; // Check xgetbv; this uses a .byte sequence instead of the instruction
|
|
;; // directly because older assemblers do not include support for xgetbv and
|
|
;; // there is no easy way to conditionally compile based on the assembler used.
|
|
;; int rEAX, rEDX;
|
|
;; __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
|
|
;; return (rEAX & 0xE6) == 0xE6;
|
|
;; }
|
|
;;
|
|
;; int32_t __get_system_isa() {
|
|
;; int info[4];
|
|
;; __cpuid(info, 1);
|
|
;;
|
|
;; // Call cpuid with eax=7, ecx=0
|
|
;; int info2[4];
|
|
;; __cpuid_count(info2, 7, 0);
|
|
;;
|
|
;; // NOTE: the values returned below must be the same as the
|
|
;; // corresponding enumerant values in Target::ISA.
|
|
;; if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
|
|
;; (info2[1] & (1 << 5)) != 0 && // AVX2
|
|
;; (info2[1] & (1 << 16)) != 0 && // AVX512 F
|
|
;; __os_has_avx512_support()) {
|
|
;; // We need to verify that AVX2 is also available,
|
|
;; // as well as AVX512, because our targets are supposed
|
|
;; // to use both.
|
|
;;
|
|
;; if ((info2[1] & (1 << 17)) != 0 && // AVX512 DQ
|
|
;; (info2[1] & (1 << 28)) != 0 && // AVX512 CDI
|
|
;; (info2[1] & (1 << 30)) != 0 && // AVX512 BW
|
|
;; (info2[1] & (1 << 31)) != 0) { // AVX512 VL
|
|
;; return 6; // SKX
|
|
;; }
|
|
;; else if ((info2[1] & (1 << 26)) != 0 && // AVX512 PF
|
|
;; (info2[1] & (1 << 27)) != 0 && // AVX512 ER
|
|
;; (info2[1] & (1 << 28)) != 0) { // AVX512 CDI
|
|
;; return 5; // KNL_AVX512
|
|
;; }
|
|
;; // If it's unknown AVX512 target, fall through and use AVX2
|
|
;; // or whatever is available in the machine.
|
|
;; }
|
|
;;
|
|
;; if ((info[2] & (1 << 27)) != 0 && // OSXSAVE
|
|
;; (info[2] & (1 << 28)) != 0 &&
|
|
;; __os_has_avx_support()) {
|
|
;; if ((info[2] & (1 << 29)) != 0 && // F16C
|
|
;; (info[2] & (1 << 30)) != 0) { // RDRAND
|
|
;; // So far, so good. AVX2?
|
|
;; if ((info2[1] & (1 << 5)) != 0)
|
|
;; return 4;
|
|
;; else
|
|
;; return 3;
|
|
;; }
|
|
;; // Regular AVX
|
|
;; return 2;
|
|
;; }
|
|
;; else if ((info[2] & (1 << 19)) != 0)
|
|
;; return 1; // SSE4
|
|
;; else if ((info[3] & (1 << 26)) != 0)
|
|
;; return 0; // SSE2
|
|
;; else
|
|
;; abort();
|
|
;; }
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;; LLVM has different IR for different versions since 3.7
|
|
|
|
define(`PTR_OP_ARGS',
|
|
ifelse(LLVM_VERSION, LLVM_3_7,
|
|
``$1 , $1 *'',
|
|
LLVM_VERSION, LLVM_3_8,
|
|
``$1 , $1 *'',
|
|
LLVM_VERSION, LLVM_3_9,
|
|
``$1 , $1 *'',
|
|
LLVM_VERSION, LLVM_4_0,
|
|
``$1 , $1 *'',
|
|
LLVM_VERSION, LLVM_5_0,
|
|
``$1 , $1 *'',
|
|
``$1 *''
|
|
)
|
|
)
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
define i32 @__get_system_isa() nounwind uwtable {
|
|
entry:
|
|
%0 = tail call { i32, i32, i32, i32 } asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
|
|
%asmresult5.i = extractvalue { i32, i32, i32, i32 } %0, 2
|
|
%asmresult6.i = extractvalue { i32, i32, i32, i32 } %0, 3
|
|
%1 = tail call { i32, i32, i32, i32 } asm sideeffect "xchg$(l$)\09$(%$)ebx, $1\0A\09cpuid\0A\09xchg$(l$)\09$(%$)ebx, $1\0A\09", "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 7, i32 0) nounwind
|
|
%asmresult4.i87 = extractvalue { i32, i32, i32, i32 } %1, 1
|
|
%and = and i32 %asmresult5.i, 134217728
|
|
%cmp = icmp eq i32 %and, 0
|
|
br i1 %cmp, label %if.else65, label %land.lhs.true
|
|
|
|
land.lhs.true: ; preds = %entry
|
|
%2 = and i32 %asmresult4.i87, 65568
|
|
%3 = icmp eq i32 %2, 65568
|
|
br i1 %3, label %land.lhs.true9, label %if.end39
|
|
|
|
land.lhs.true9: ; preds = %land.lhs.true
|
|
%4 = tail call { i32, i32 } asm sideeffect ".byte 0x0f, 0x01, 0xd0", "={ax},={dx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 0) nounwind
|
|
%asmresult.i90 = extractvalue { i32, i32 } %4, 0
|
|
%and.i = and i32 %asmresult.i90, 230
|
|
%cmp.i = icmp eq i32 %and.i, 230
|
|
br i1 %cmp.i, label %if.then, label %if.end39
|
|
|
|
if.then: ; preds = %land.lhs.true9
|
|
%5 = and i32 %asmresult4.i87, -805175296
|
|
%6 = icmp eq i32 %5, -805175296
|
|
br i1 %6, label %return, label %if.else
|
|
|
|
if.else: ; preds = %if.then
|
|
%7 = and i32 %asmresult4.i87, 469762048
|
|
%8 = icmp eq i32 %7, 469762048
|
|
br i1 %8, label %return, label %if.end39
|
|
|
|
if.end39: ; preds = %if.else, %land.lhs.true9, %land.lhs.true
|
|
%9 = and i32 %asmresult5.i, 402653184
|
|
%10 = icmp eq i32 %9, 402653184
|
|
br i1 %10, label %land.lhs.true47, label %if.else65
|
|
|
|
land.lhs.true47: ; preds = %if.end39
|
|
%11 = tail call { i32, i32 } asm sideeffect ".byte 0x0f, 0x01, 0xd0", "={ax},={dx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 0) nounwind
|
|
%asmresult.i91 = extractvalue { i32, i32 } %11, 0
|
|
%and.i92 = and i32 %asmresult.i91, 6
|
|
%cmp.i93 = icmp eq i32 %and.i92, 6
|
|
br i1 %cmp.i93, label %if.then50, label %if.else65
|
|
|
|
if.then50: ; preds = %land.lhs.true47
|
|
%12 = and i32 %asmresult5.i, 1610612736
|
|
%13 = icmp eq i32 %12, 1610612736
|
|
br i1 %13, label %if.then58, label %return
|
|
|
|
if.then58: ; preds = %if.then50
|
|
%and60 = lshr i32 %asmresult4.i87, 5
|
|
%14 = and i32 %and60, 1
|
|
%15 = add i32 %14, 3
|
|
br label %return
|
|
|
|
if.else65: ; preds = %land.lhs.true47, %if.end39, %entry
|
|
%and67 = and i32 %asmresult5.i, 524288
|
|
%cmp68 = icmp eq i32 %and67, 0
|
|
br i1 %cmp68, label %if.else70, label %return
|
|
|
|
if.else70: ; preds = %if.else65
|
|
%and72 = and i32 %asmresult6.i, 67108864
|
|
%cmp73 = icmp eq i32 %and72, 0
|
|
br i1 %cmp73, label %if.else75, label %return
|
|
|
|
if.else75: ; preds = %if.else70
|
|
tail call void @abort() noreturn nounwind
|
|
unreachable
|
|
|
|
return: ; preds = %if.else70, %if.else65, %if.then58, %if.then50, %if.else, %if.then
|
|
%retval.0 = phi i32 [ 6, %if.then ], [ 5, %if.else ], [ %15, %if.then58 ], [ 2, %if.then50 ], [ 1, %if.else65 ], [ 0, %if.else70 ]
|
|
ret i32 %retval.0
|
|
}
|
|
|
|
declare void @abort() noreturn nounwind
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; This function is called by each of the dispatch functions we generate;
|
|
;; it sets @__system_best_isa if it is unset.
|
|
|
|
define void @__set_system_isa() {
|
|
entry:
|
|
%bi = load PTR_OP_ARGS(`i32 ') @__system_best_isa
|
|
%unset = icmp eq i32 %bi, -1
|
|
br i1 %unset, label %set_system_isa, label %done
|
|
|
|
set_system_isa:
|
|
%bival = call i32 @__get_system_isa()
|
|
store i32 %bival, i32* @__system_best_isa
|
|
ret void
|
|
|
|
done:
|
|
ret void
|
|
}
|
|
|