Replaced SKX patches with cumulative one, added some changes, switched to CMAKE configuration for LLVM 3.8 and newer versions.
This commit is contained in:
7
alloy.py
7
alloy.py
@@ -241,13 +241,14 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
|||||||
os.makedirs(LLVM_BUILD)
|
os.makedirs(LLVM_BUILD)
|
||||||
os.makedirs(LLVM_BIN)
|
os.makedirs(LLVM_BIN)
|
||||||
selfbuild_compiler = ""
|
selfbuild_compiler = ""
|
||||||
|
LLVM_configure_capable = ["3_2", "3_3", "3_4", "3_5", "3_6", "3_7"]
|
||||||
if selfbuild:
|
if selfbuild:
|
||||||
print_debug("Making selfbuild and use folders " + LLVM_BUILD_selfbuild + " and " +
|
print_debug("Making selfbuild and use folders " + LLVM_BUILD_selfbuild + " and " +
|
||||||
LLVM_BIN_selfbuild + "\n", from_validation, alloy_build)
|
LLVM_BIN_selfbuild + "\n", from_validation, alloy_build)
|
||||||
os.makedirs(LLVM_BUILD_selfbuild)
|
os.makedirs(LLVM_BUILD_selfbuild)
|
||||||
os.makedirs(LLVM_BIN_selfbuild)
|
os.makedirs(LLVM_BIN_selfbuild)
|
||||||
os.chdir(LLVM_BUILD_selfbuild)
|
os.chdir(LLVM_BUILD_selfbuild)
|
||||||
if version_LLVM == "trunk":
|
if version_LLVM not in LLVM_configure_capable:
|
||||||
# TODO: mac_root
|
# TODO: mac_root
|
||||||
try_do_LLVM("configure release version for selfbuild ",
|
try_do_LLVM("configure release version for selfbuild ",
|
||||||
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
|
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
|
||||||
@@ -283,7 +284,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
|||||||
os.chdir(LLVM_BUILD)
|
os.chdir(LLVM_BUILD)
|
||||||
if debug == False:
|
if debug == False:
|
||||||
if current_OS != "Windows":
|
if current_OS != "Windows":
|
||||||
if version_LLVM == "trunk":
|
if version_LLVM not in LLVM_configure_capable:
|
||||||
# TODO: mac_root
|
# TODO: mac_root
|
||||||
try_do_LLVM("configure release version ",
|
try_do_LLVM("configure release version ",
|
||||||
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
|
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
|
||||||
@@ -311,7 +312,7 @@ def build_LLVM(version_LLVM, revision, folder, tarball, debug, selfbuild, extra,
|
|||||||
'" -DLLVM_LIT_TOOLS_DIR="C:\\gnuwin32\\bin" ..\\' + LLVM_SRC,
|
'" -DLLVM_LIT_TOOLS_DIR="C:\\gnuwin32\\bin" ..\\' + LLVM_SRC,
|
||||||
from_validation)
|
from_validation)
|
||||||
else:
|
else:
|
||||||
if version_LLVM == "trunk":
|
if version_LLVM not in LLVM_configure_capable:
|
||||||
# TODO: mac_root
|
# TODO: mac_root
|
||||||
try_do_LLVM("configure debug version ",
|
try_do_LLVM("configure debug version ",
|
||||||
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
|
"cmake -G Unix\ Makefiles" + " -DCMAKE_EXPORT_COMPILE_COMMANDS=ON" +
|
||||||
|
|||||||
@@ -1,263 +0,0 @@
|
|||||||
# This patch merges slightly modified revisions for SKX capability with LLVM 3.8
|
|
||||||
# revisions: r260878, r258867
|
|
||||||
Index: test/CodeGen/X86/avx512-intel-ocl.ll
|
|
||||||
===================================================================
|
|
||||||
--- test/CodeGen/X86/avx512-intel-ocl.ll (revision 260877)
|
|
||||||
+++ test/CodeGen/X86/avx512-intel-ocl.ll (revision 260878)
|
|
||||||
@@ -68,10 +68,10 @@
|
|
||||||
; WIN64: vmovups {{.*(%rbp).*}}, %zmm21 # 64-byte Reload
|
|
||||||
|
|
||||||
; X64-LABEL: test_prolog_epilog
|
|
||||||
-; X64: kmovw %k7, {{.*}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
-; X64: kmovw %k6, {{.*}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
-; X64: kmovw %k5, {{.*}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
-; X64: kmovw %k4, {{.*}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
+; X64: kmovq %k7, {{.*}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
+; X64: kmovq %k6, {{.*}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
+; X64: kmovq %k5, {{.*}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
+; X64: kmovq %k4, {{.*}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
; X64: vmovups %zmm31, {{.*}}(%rsp) ## 64-byte Spill
|
|
||||||
; X64: vmovups %zmm16, {{.*}}(%rsp) ## 64-byte Spill
|
|
||||||
; X64: call
|
|
||||||
Index: test/CodeGen/X86/avx512-mask-spills.ll
|
|
||||||
===================================================================
|
|
||||||
--- test/CodeGen/X86/avx512-mask-spills.ll (revision 0)
|
|
||||||
+++ test/CodeGen/X86/avx512-mask-spills.ll (revision 260878)
|
|
||||||
@@ -0,0 +1,126 @@
|
|
||||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
||||||
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
|
|
||||||
+
|
|
||||||
+declare void @f()
|
|
||||||
+define <4 x i1> @test_4i1(<4 x i32> %a, <4 x i32> %b) {
|
|
||||||
+; CHECK-LABEL: test_4i1:
|
|
||||||
+; CHECK: ## BB#0:
|
|
||||||
+; CHECK-NEXT: pushq %rax
|
|
||||||
+; CHECK-NEXT: Ltmp0:
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
+; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: callq _f
|
|
||||||
+; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: korw %k1, %k0, %k0
|
|
||||||
+; CHECK-NEXT: vpmovm2d %k0, %xmm0
|
|
||||||
+; CHECK-NEXT: popq %rax
|
|
||||||
+; CHECK-NEXT: retq
|
|
||||||
+
|
|
||||||
+ %cmp_res = icmp ugt <4 x i32> %a, %b
|
|
||||||
+ %cmp_res2 = icmp sgt <4 x i32> %a, %b
|
|
||||||
+ call void @f()
|
|
||||||
+ %res = or <4 x i1> %cmp_res, %cmp_res2
|
|
||||||
+ ret <4 x i1> %res
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define <8 x i1> @test_8i1(<8 x i32> %a, <8 x i32> %b) {
|
|
||||||
+; CHECK-LABEL: test_8i1:
|
|
||||||
+; CHECK: ## BB#0:
|
|
||||||
+; CHECK-NEXT: pushq %rax
|
|
||||||
+; CHECK-NEXT: Ltmp1:
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
+; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: callq _f
|
|
||||||
+; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: korb %k1, %k0, %k0
|
|
||||||
+; CHECK-NEXT: vpmovm2w %k0, %xmm0
|
|
||||||
+; CHECK-NEXT: popq %rax
|
|
||||||
+; CHECK-NEXT: retq
|
|
||||||
+
|
|
||||||
+ %cmp_res = icmp ugt <8 x i32> %a, %b
|
|
||||||
+ %cmp_res2 = icmp sgt <8 x i32> %a, %b
|
|
||||||
+ call void @f()
|
|
||||||
+ %res = or <8 x i1> %cmp_res, %cmp_res2
|
|
||||||
+ ret <8 x i1> %res
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define <16 x i1> @test_16i1(<16 x i32> %a, <16 x i32> %b) {
|
|
||||||
+; CHECK-LABEL: test_16i1:
|
|
||||||
+; CHECK: ## BB#0:
|
|
||||||
+; CHECK-NEXT: pushq %rax
|
|
||||||
+; CHECK-NEXT: Ltmp2:
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
+; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: callq _f
|
|
||||||
+; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: korw %k1, %k0, %k0
|
|
||||||
+; CHECK-NEXT: vpmovm2b %k0, %xmm0
|
|
||||||
+; CHECK-NEXT: popq %rax
|
|
||||||
+; CHECK-NEXT: retq
|
|
||||||
+ %cmp_res = icmp ugt <16 x i32> %a, %b
|
|
||||||
+ %cmp_res2 = icmp sgt <16 x i32> %a, %b
|
|
||||||
+ call void @f()
|
|
||||||
+ %res = or <16 x i1> %cmp_res, %cmp_res2
|
|
||||||
+ ret <16 x i1> %res
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define <32 x i1> @test_32i1(<32 x i16> %a, <32 x i16> %b) {
|
|
||||||
+; CHECK-LABEL: test_32i1:
|
|
||||||
+; CHECK: ## BB#0:
|
|
||||||
+; CHECK-NEXT: pushq %rax
|
|
||||||
+; CHECK-NEXT: Ltmp3:
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
+; CHECK-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovd %k0, {{[0-9]+}}(%rsp) ## 4-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovd %k0, (%rsp) ## 4-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: callq _f
|
|
||||||
+; CHECK-NEXT: kmovd {{[0-9]+}}(%rsp), %k0 ## 4-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: kmovd (%rsp), %k1 ## 4-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: kord %k1, %k0, %k0
|
|
||||||
+; CHECK-NEXT: vpmovm2b %k0, %ymm0
|
|
||||||
+; CHECK-NEXT: popq %rax
|
|
||||||
+; CHECK-NEXT: retq
|
|
||||||
+ %cmp_res = icmp ugt <32 x i16> %a, %b
|
|
||||||
+ %cmp_res2 = icmp sgt <32 x i16> %a, %b
|
|
||||||
+ call void @f()
|
|
||||||
+ %res = or <32 x i1> %cmp_res, %cmp_res2
|
|
||||||
+ ret <32 x i1> %res
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define <64 x i1> @test_64i1(<64 x i8> %a, <64 x i8> %b) {
|
|
||||||
+; CHECK-LABEL: test_64i1:
|
|
||||||
+; CHECK: ## BB#0:
|
|
||||||
+; CHECK-NEXT: subq $24, %rsp
|
|
||||||
+; CHECK-NEXT: Ltmp4:
|
|
||||||
+; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
||||||
+; CHECK-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
|
|
||||||
+; CHECK-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Folded Spill
|
|
||||||
+; CHECK-NEXT: callq _f
|
|
||||||
+; CHECK-NEXT: kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Folded Reload
|
|
||||||
+; CHECK-NEXT: korq %k1, %k0, %k0
|
|
||||||
+; CHECK-NEXT: vpmovm2b %k0, %zmm0
|
|
||||||
+; CHECK-NEXT: addq $24, %rsp
|
|
||||||
+; CHECK-NEXT: retq
|
|
||||||
+
|
|
||||||
+ %cmp_res = icmp ugt <64 x i8> %a, %b
|
|
||||||
+ %cmp_res2 = icmp sgt <64 x i8> %a, %b
|
|
||||||
+ call void @f()
|
|
||||||
+ %res = or <64 x i1> %cmp_res, %cmp_res2
|
|
||||||
+ ret <64 x i1> %res
|
|
||||||
+}
|
|
||||||
Index: lib/Target/X86/X86RegisterInfo.td
|
|
||||||
===================================================================
|
|
||||||
--- lib/Target/X86/X86RegisterInfo.td (revision 260877)
|
|
||||||
+++ lib/Target/X86/X86RegisterInfo.td (revision 260878)
|
|
||||||
@@ -477,18 +477,18 @@
|
|
||||||
256, (sequence "YMM%u", 0, 31)>;
|
|
||||||
|
|
||||||
// Mask registers
|
|
||||||
-def VK1 : RegisterClass<"X86", [i1], 8, (sequence "K%u", 0, 7)> {let Size = 8;}
|
|
||||||
-def VK2 : RegisterClass<"X86", [v2i1], 8, (add VK1)> {let Size = 8;}
|
|
||||||
-def VK4 : RegisterClass<"X86", [v4i1], 8, (add VK2)> {let Size = 8;}
|
|
||||||
-def VK8 : RegisterClass<"X86", [v8i1], 8, (add VK4)> {let Size = 8;}
|
|
||||||
+def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
|
|
||||||
+def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
|
|
||||||
+def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
|
|
||||||
+def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
|
|
||||||
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
|
|
||||||
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
|
|
||||||
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
|
|
||||||
|
|
||||||
-def VK1WM : RegisterClass<"X86", [i1], 8, (sub VK1, K0)> {let Size = 8;}
|
|
||||||
-def VK2WM : RegisterClass<"X86", [v2i1], 8, (sub VK2, K0)> {let Size = 8;}
|
|
||||||
-def VK4WM : RegisterClass<"X86", [v4i1], 8, (sub VK4, K0)> {let Size = 8;}
|
|
||||||
-def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)> {let Size = 8;}
|
|
||||||
+def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;}
|
|
||||||
+def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
|
|
||||||
+def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
|
|
||||||
+def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
|
|
||||||
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
|
|
||||||
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
|
|
||||||
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
|
|
||||||
Index: lib/Target/X86/X86InstrInfo.cpp
|
|
||||||
===================================================================
|
|
||||||
--- lib/Target/X86/X86InstrInfo.cpp (revision 260877)
|
|
||||||
+++ lib/Target/X86/X86InstrInfo.cpp (revision 260878)
|
|
||||||
@@ -4302,12 +4302,14 @@
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static bool isMaskRegClass(const TargetRegisterClass *RC) {
|
|
||||||
+ // All KMASK RegClasses hold the same k registers, can be tested against anyone.
|
|
||||||
+ return X86::VK16RegClass.hasSubClassEq(RC);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static bool MaskRegClassContains(unsigned Reg) {
|
|
||||||
- return X86::VK8RegClass.contains(Reg) ||
|
|
||||||
- X86::VK16RegClass.contains(Reg) ||
|
|
||||||
- X86::VK32RegClass.contains(Reg) ||
|
|
||||||
- X86::VK64RegClass.contains(Reg) ||
|
|
||||||
- X86::VK1RegClass.contains(Reg);
|
|
||||||
+ // All KMASK RegClasses hold the same k registers, can be tested against anyone.
|
|
||||||
+ return X86::VK16RegClass.contains(Reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool GRRegClassContains(unsigned Reg) {
|
|
||||||
@@ -4509,15 +4511,28 @@
|
|
||||||
llvm_unreachable("Cannot emit physreg copy instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
+static unsigned getLoadStoreMaskRegOpcode(const TargetRegisterClass *RC,
|
|
||||||
+ bool load) {
|
|
||||||
+ switch (RC->getSize()) {
|
|
||||||
+ default:
|
|
||||||
+ llvm_unreachable("Unknown spill size");
|
|
||||||
+ case 2:
|
|
||||||
+ return load ? X86::KMOVWkm : X86::KMOVWmk;
|
|
||||||
+ case 4:
|
|
||||||
+ return load ? X86::KMOVDkm : X86::KMOVDmk;
|
|
||||||
+ case 8:
|
|
||||||
+ return load ? X86::KMOVQkm : X86::KMOVQmk;
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static unsigned getLoadStoreRegOpcode(unsigned Reg,
|
|
||||||
const TargetRegisterClass *RC,
|
|
||||||
bool isStackAligned,
|
|
||||||
const X86Subtarget &STI,
|
|
||||||
bool load) {
|
|
||||||
if (STI.hasAVX512()) {
|
|
||||||
- if (X86::VK8RegClass.hasSubClassEq(RC) ||
|
|
||||||
- X86::VK16RegClass.hasSubClassEq(RC))
|
|
||||||
- return load ? X86::KMOVWkm : X86::KMOVWmk;
|
|
||||||
+ if (isMaskRegClass(RC))
|
|
||||||
+ return getLoadStoreMaskRegOpcode(RC, load);
|
|
||||||
if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
|
|
||||||
return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
|
|
||||||
if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
|
|
||||||
Index: lib/Target/X86/X86ISelLowering.cpp
|
|
||||||
===================================================================
|
|
||||||
--- lib/Target/X86/X86ISelLowering.cpp (revision 260877)
|
|
||||||
+++ lib/Target/X86/X86ISelLowering.cpp (revision 260878)
|
|
||||||
@@ -12123,10 +12123,15 @@
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
|
||||||
- const TargetRegisterClass* rc = getRegClassFor(VecVT);
|
|
||||||
- if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8))
|
|
||||||
- rc = getRegClassFor(MVT::v16i1);
|
|
||||||
- unsigned MaxSift = rc->getSize()*8 - 1;
|
|
||||||
+ if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8)) {
|
|
||||||
+ // Use kshiftlw/rw instruction.
|
|
||||||
+ VecVT = MVT::v16i1;
|
|
||||||
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
|
|
||||||
+ DAG.getUNDEF(VecVT),
|
|
||||||
+ Vec,
|
|
||||||
+ DAG.getIntPtrConstant(0, dl));
|
|
||||||
+ }
|
|
||||||
+ unsigned MaxSift = VecVT.getVectorNumElements() - 1;
|
|
||||||
Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
|
|
||||||
DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
|
|
||||||
Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
|
|
||||||
@@ -1,54 +0,0 @@
|
|||||||
diff -ruN lib/Target/X86/X86InstrInfo.td lib/Target/X86/X86InstrInfo.td
|
|
||||||
--- lib/Target/X86/X86InstrInfo.td 2016-03-04 12:36:50.461576093 +0300
|
|
||||||
+++ lib/Target/X86/X86InstrInfo.td 2016-03-04 12:38:58.747585762 +0300
|
|
||||||
@@ -728,6 +728,8 @@
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// X86 Instruction Predicate Definitions.
|
|
||||||
+def TruePredicate : Predicate<"true">;
|
|
||||||
+
|
|
||||||
def HasCMov : Predicate<"Subtarget->hasCMov()">;
|
|
||||||
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
|
|
||||||
|
|
||||||
diff -ruN lib/Target/X86/X86InstrSSE.td lib/Target/X86/X86InstrSSE.td
|
|
||||||
--- lib/Target/X86/X86InstrSSE.td 2016-03-04 12:36:50.472576094 +0300
|
|
||||||
+++ lib/Target/X86/X86InstrSSE.td 2016-03-04 12:41:38.419597797 +0300
|
|
||||||
@@ -4273,17 +4273,17 @@
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
|
|
||||||
- SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
|
|
||||||
+ SSE_INTALU_ITINS_P, 1, TruePredicate>;
|
|
||||||
defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
|
|
||||||
- SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
|
|
||||||
+ SSE_INTALU_ITINS_P, 1, TruePredicate>;
|
|
||||||
defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
|
|
||||||
- SSE_INTALU_ITINS_P, 1, NoVLX>;
|
|
||||||
+ SSE_INTALU_ITINS_P, 1, TruePredicate>;
|
|
||||||
defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
|
|
||||||
- SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
|
|
||||||
+ SSE_INTALU_ITINS_P, 0, TruePredicate>;
|
|
||||||
defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
|
|
||||||
- SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
|
|
||||||
+ SSE_INTALU_ITINS_P, 0, TruePredicate>;
|
|
||||||
defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
|
|
||||||
- SSE_INTALU_ITINS_P, 0, NoVLX>;
|
|
||||||
+ SSE_INTALU_ITINS_P, 0, TruePredicate>;
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
// SSE2 - Packed Integer Shuffle Instructions
|
|
||||||
diff -ruN test/CodeGen/X86/avx-isa-check.ll test/CodeGen/X86/avx-isa-check.ll
|
|
||||||
--- test/CodeGen/X86/avx-isa-check.ll 2016-03-04 12:36:45.218575698 +0300
|
|
||||||
+++ test/CodeGen/X86/avx-isa-check.ll 2016-03-04 12:44:06.705608973 +0300
|
|
||||||
@@ -568,3 +568,11 @@
|
|
||||||
%shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
|
||||||
ret <8 x i16> %shift
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+define <32 x i8> @test_cmpgtb(<32 x i8> %A) {
|
|
||||||
+; generate the follow code
|
|
||||||
+; vpxor %ymm1, %ymm1, %ymm1
|
|
||||||
+; vpcmpgtb %ymm0, %ymm1, %ymm0
|
|
||||||
+ %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
|
||||||
+ ret <32 x i8> %B
|
|
||||||
+}
|
|
||||||
248
llvm_patches/3_8_skx_patch_pack.patch
Normal file
248
llvm_patches/3_8_skx_patch_pack.patch
Normal file
@@ -0,0 +1,248 @@
|
|||||||
|
--- lib/Target/X86/X86ISelLowering.cpp 2016-04-07 01:11:55.018960678 +0300
|
||||||
|
+++ lib/Target/X86/X86ISelLowering.cpp 2016-04-07 01:13:57.643965706 +0300
|
||||||
|
@@ -1413,9 +1413,6 @@
|
||||||
|
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
|
||||||
|
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
|
||||||
|
if (Subtarget->hasDQI()) {
|
||||||
|
- setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
|
||||||
|
- setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
|
||||||
|
-
|
||||||
|
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
|
||||||
|
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
|
||||||
|
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
|
||||||
|
@@ -1709,6 +1706,8 @@
|
||||||
|
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
|
||||||
|
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
|
||||||
|
|
||||||
|
+ setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
|
||||||
|
+ setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
|
||||||
|
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
|
||||||
|
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
|
||||||
|
@@ -11737,10 +11736,15 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||||
|
- const TargetRegisterClass* rc = getRegClassFor(VecVT);
|
||||||
|
- if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8))
|
||||||
|
- rc = getRegClassFor(MVT::v16i1);
|
||||||
|
- unsigned MaxSift = rc->getSize()*8 - 1;
|
||||||
|
+ if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8)) {
|
||||||
|
+ // Use kshiftlw/rw instruction.
|
||||||
|
+ VecVT = MVT::v16i1;
|
||||||
|
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
|
||||||
|
+ DAG.getUNDEF(VecVT),
|
||||||
|
+ Vec,
|
||||||
|
+ DAG.getIntPtrConstant(0, dl));
|
||||||
|
+ }
|
||||||
|
+ unsigned MaxSift = VecVT.getVectorNumElements() - 1;
|
||||||
|
Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
|
||||||
|
DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
|
||||||
|
Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
|
||||||
|
--- lib/Target/X86/X86InstrAVX512.td 2016-04-07 01:11:55.020960678 +0300
|
||||||
|
+++ lib/Target/X86/X86InstrAVX512.td 2016-04-07 01:12:30.680962140 +0300
|
||||||
|
@@ -2043,9 +2043,6 @@
|
||||||
|
VEX, PD, VEX_W;
|
||||||
|
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
|
||||||
|
VEX, XD;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-let Predicates = [HasBWI] in {
|
||||||
|
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
|
||||||
|
VEX, PS, VEX_W;
|
||||||
|
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
|
||||||
|
@@ -2085,8 +2082,27 @@
|
||||||
|
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
|
||||||
|
def : Pat<(store VK2:$src, addr:$dst),
|
||||||
|
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
|
||||||
|
+ def : Pat<(store VK1:$src, addr:$dst),
|
||||||
|
+ (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
|
||||||
|
}
|
||||||
|
let Predicates = [HasAVX512, NoDQI] in {
|
||||||
|
+ def : Pat<(store VK1:$src, addr:$dst),
|
||||||
|
+ (MOV8mr addr:$dst,
|
||||||
|
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
|
||||||
|
+ sub_8bit))>;
|
||||||
|
+ def : Pat<(store VK2:$src, addr:$dst),
|
||||||
|
+ (MOV8mr addr:$dst,
|
||||||
|
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK2:$src, VK16)),
|
||||||
|
+ sub_8bit))>;
|
||||||
|
+ def : Pat<(store VK4:$src, addr:$dst),
|
||||||
|
+ (MOV8mr addr:$dst,
|
||||||
|
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK4:$src, VK16)),
|
||||||
|
+ sub_8bit))>;
|
||||||
|
+ def : Pat<(store VK8:$src, addr:$dst),
|
||||||
|
+ (MOV8mr addr:$dst,
|
||||||
|
+ (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
|
||||||
|
+ sub_8bit))>;
|
||||||
|
+
|
||||||
|
def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
|
||||||
|
(KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
|
||||||
|
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
|
||||||
|
@@ -2166,6 +2182,17 @@
|
||||||
|
def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
|
||||||
|
(COPY_TO_REGCLASS VK1:$src, VK64)>;
|
||||||
|
|
||||||
|
+def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||||
|
+def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||||
|
+def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
|
||||||
|
+
|
||||||
|
+def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
+ (truncstore node:$val, node:$ptr), [{
|
||||||
|
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
|
||||||
|
+}]>;
|
||||||
|
+
|
||||||
|
+def : Pat<(truncstorei1 GR8:$src, addr:$dst),
|
||||||
|
+ (MOV8mr addr:$dst, GR8:$src)>;
|
||||||
|
|
||||||
|
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
|
||||||
|
let Predicates = [HasAVX512, NoDQI] in {
|
||||||
|
@@ -6540,28 +6567,6 @@
|
||||||
|
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
|
||||||
|
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
|
||||||
|
|
||||||
|
-def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||||
|
-def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||||
|
-def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
|
||||||
|
-
|
||||||
|
-def : Pat<(store VK1:$src, addr:$dst),
|
||||||
|
- (MOV8mr addr:$dst,
|
||||||
|
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
|
||||||
|
- sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
|
||||||
|
-
|
||||||
|
-def : Pat<(store VK8:$src, addr:$dst),
|
||||||
|
- (MOV8mr addr:$dst,
|
||||||
|
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
|
||||||
|
- sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
|
||||||
|
-
|
||||||
|
-def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
- (truncstore node:$val, node:$ptr), [{
|
||||||
|
- return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
|
||||||
|
-}]>;
|
||||||
|
-
|
||||||
|
-def : Pat<(truncstorei1 GR8:$src, addr:$dst),
|
||||||
|
- (MOV8mr addr:$dst, GR8:$src)>;
|
||||||
|
-
|
||||||
|
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
|
||||||
|
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
|
||||||
|
!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
|
||||||
|
--- lib/Target/X86/X86InstrInfo.cpp 2016-04-07 01:11:55.016960678 +0300
|
||||||
|
+++ lib/Target/X86/X86InstrInfo.cpp 2016-04-07 01:13:00.255963353 +0300
|
||||||
|
@@ -4286,12 +4286,14 @@
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static bool isMaskRegClass(const TargetRegisterClass *RC) {
|
||||||
|
+ // All KMASK RegClasses hold the same k registers, can be tested against anyone.
|
||||||
|
+ return X86::VK16RegClass.hasSubClassEq(RC);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static bool MaskRegClassContains(unsigned Reg) {
|
||||||
|
- return X86::VK8RegClass.contains(Reg) ||
|
||||||
|
- X86::VK16RegClass.contains(Reg) ||
|
||||||
|
- X86::VK32RegClass.contains(Reg) ||
|
||||||
|
- X86::VK64RegClass.contains(Reg) ||
|
||||||
|
- X86::VK1RegClass.contains(Reg);
|
||||||
|
+ // All KMASK RegClasses hold the same k registers, can be tested against anyone.
|
||||||
|
+ return X86::VK16RegClass.contains(Reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool GRRegClassContains(unsigned Reg) {
|
||||||
|
@@ -4493,15 +4495,28 @@
|
||||||
|
llvm_unreachable("Cannot emit physreg copy instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
+static unsigned getLoadStoreMaskRegOpcode(const TargetRegisterClass *RC,
|
||||||
|
+ bool load) {
|
||||||
|
+ switch (RC->getSize()) {
|
||||||
|
+ default:
|
||||||
|
+ llvm_unreachable("Unknown spill size");
|
||||||
|
+ case 2:
|
||||||
|
+ return load ? X86::KMOVWkm : X86::KMOVWmk;
|
||||||
|
+ case 4:
|
||||||
|
+ return load ? X86::KMOVDkm : X86::KMOVDmk;
|
||||||
|
+ case 8:
|
||||||
|
+ return load ? X86::KMOVQkm : X86::KMOVQmk;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static unsigned getLoadStoreRegOpcode(unsigned Reg,
|
||||||
|
const TargetRegisterClass *RC,
|
||||||
|
bool isStackAligned,
|
||||||
|
const X86Subtarget &STI,
|
||||||
|
bool load) {
|
||||||
|
if (STI.hasAVX512()) {
|
||||||
|
- if (X86::VK8RegClass.hasSubClassEq(RC) ||
|
||||||
|
- X86::VK16RegClass.hasSubClassEq(RC))
|
||||||
|
- return load ? X86::KMOVWkm : X86::KMOVWmk;
|
||||||
|
+ if (isMaskRegClass(RC))
|
||||||
|
+ return getLoadStoreMaskRegOpcode(RC, load);
|
||||||
|
if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
|
||||||
|
return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
|
||||||
|
if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
|
||||||
|
--- lib/Target/X86/X86InstrInfo.td 2016-04-07 01:11:55.018960678 +0300
|
||||||
|
+++ lib/Target/X86/X86InstrInfo.td 2016-04-07 01:14:17.400966516 +0300
|
||||||
|
@@ -728,6 +728,8 @@
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// X86 Instruction Predicate Definitions.
|
||||||
|
+def TruePredicate : Predicate<"true">;
|
||||||
|
+
|
||||||
|
def HasCMov : Predicate<"Subtarget->hasCMov()">;
|
||||||
|
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
|
||||||
|
|
||||||
|
--- lib/Target/X86/X86InstrSSE.td 2016-04-07 01:11:55.014960678 +0300
|
||||||
|
+++ lib/Target/X86/X86InstrSSE.td 2016-04-07 01:14:18.172966548 +0300
|
||||||
|
@@ -4273,17 +4273,17 @@
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
|
||||||
|
- SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
|
||||||
|
+ SSE_INTALU_ITINS_P, 1, TruePredicate>;
|
||||||
|
defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
|
||||||
|
- SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
|
||||||
|
+ SSE_INTALU_ITINS_P, 1, TruePredicate>;
|
||||||
|
defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
|
||||||
|
- SSE_INTALU_ITINS_P, 1, NoVLX>;
|
||||||
|
+ SSE_INTALU_ITINS_P, 1, TruePredicate>;
|
||||||
|
defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
|
||||||
|
- SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
|
||||||
|
+ SSE_INTALU_ITINS_P, 0, TruePredicate>;
|
||||||
|
defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
|
||||||
|
- SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
|
||||||
|
+ SSE_INTALU_ITINS_P, 0, TruePredicate>;
|
||||||
|
defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
|
||||||
|
- SSE_INTALU_ITINS_P, 0, NoVLX>;
|
||||||
|
+ SSE_INTALU_ITINS_P, 0, TruePredicate>;
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
// SSE2 - Packed Integer Shuffle Instructions
|
||||||
|
--- lib/Target/X86/X86RegisterInfo.td 2016-04-07 01:11:55.018960678 +0300
|
||||||
|
+++ lib/Target/X86/X86RegisterInfo.td 2016-04-07 01:13:01.037963385 +0300
|
||||||
|
@@ -477,18 +477,18 @@
|
||||||
|
256, (sequence "YMM%u", 0, 31)>;
|
||||||
|
|
||||||
|
// Mask registers
|
||||||
|
-def VK1 : RegisterClass<"X86", [i1], 8, (sequence "K%u", 0, 7)> {let Size = 8;}
|
||||||
|
-def VK2 : RegisterClass<"X86", [v2i1], 8, (add VK1)> {let Size = 8;}
|
||||||
|
-def VK4 : RegisterClass<"X86", [v4i1], 8, (add VK2)> {let Size = 8;}
|
||||||
|
-def VK8 : RegisterClass<"X86", [v8i1], 8, (add VK4)> {let Size = 8;}
|
||||||
|
+def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
|
||||||
|
+def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
|
||||||
|
+def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
|
||||||
|
+def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
|
||||||
|
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
|
||||||
|
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
|
||||||
|
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
|
||||||
|
|
||||||
|
-def VK1WM : RegisterClass<"X86", [i1], 8, (sub VK1, K0)> {let Size = 8;}
|
||||||
|
-def VK2WM : RegisterClass<"X86", [v2i1], 8, (sub VK2, K0)> {let Size = 8;}
|
||||||
|
-def VK4WM : RegisterClass<"X86", [v4i1], 8, (sub VK4, K0)> {let Size = 8;}
|
||||||
|
-def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)> {let Size = 8;}
|
||||||
|
+def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;}
|
||||||
|
+def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
|
||||||
|
+def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
|
||||||
|
+def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
|
||||||
|
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
|
||||||
|
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
|
||||||
|
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
|
||||||
Reference in New Issue
Block a user