summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp58
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td79
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp22
3 files changed, 106 insertions, 53 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 67a1e12f6cd..7d38abbc2ce 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -367,6 +367,10 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
+ // TODO: Support this properly.
+ if (Subtarget->hasAVX512())
+ return false;
+ LLVM_FALLTHROUGH;
case MVT::i8:
Opc = X86::MOV8rm;
RC = &X86::GR8RegClass;
@@ -540,11 +544,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
// In case ValReg is a K register, COPY to a GPR
if (MRI.getRegClass(ValReg) == &X86::VK1RegClass) {
unsigned KValReg = ValReg;
- ValReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass
- : &X86::GR8_ABCD_LRegClass);
+ ValReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ValReg)
.addReg(KValReg);
+ ValReg = fastEmitInst_extractsubreg(MVT::i8, ValReg, /*Kill=*/true,
+ X86::sub_8bit);
}
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
@@ -1280,11 +1285,12 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// In case SrcReg is a K register, COPY to a GPR
if (MRI.getRegClass(SrcReg) == &X86::VK1RegClass) {
unsigned KSrcReg = SrcReg;
- SrcReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass
- : &X86::GR8_ABCD_LRegClass);
+ SrcReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), SrcReg)
.addReg(KSrcReg);
+ SrcReg = fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
+ X86::sub_8bit);
}
SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
@@ -1580,11 +1586,12 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
// In case ResultReg is a K register, COPY to a GPR
if (MRI.getRegClass(ResultReg) == &X86::VK1RegClass) {
unsigned KResultReg = ResultReg;
- ResultReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass
- : &X86::GR8_ABCD_LRegClass);
+ ResultReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(KResultReg);
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+ X86::sub_8bit);
}
// Set the high bits to zero.
@@ -1768,11 +1775,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
unsigned KOpReg = OpReg;
- OpReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass
- : &X86::GR8_ABCD_LRegClass);
+ OpReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), OpReg)
.addReg(KOpReg);
+ OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(OpReg)
@@ -2113,11 +2121,12 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
- CondReg = createResultReg(Subtarget->is64Bit() ?
- &X86::GR8RegClass : &X86::GR8_ABCD_LRegClass);
+ CondReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
@@ -2327,11 +2336,12 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
- CondReg = createResultReg(Subtarget->is64Bit() ?
- &X86::GR8RegClass : &X86::GR8_ABCD_LRegClass);
+ CondReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
@@ -3307,6 +3317,16 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Handle zero-extension from i1 to i8, which is common.
if (ArgVT == MVT::i1) {
+ // In case SrcReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ArgReg) == &X86::VK1RegClass) {
+ unsigned KArgReg = ArgReg;
+ ArgReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ArgReg)
+ .addReg(KArgReg);
+ ArgReg = fastEmitInst_extractsubreg(MVT::i8, ArgReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
// Set the high bits to zero.
ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
ArgVT = MVT::i8;
@@ -3642,6 +3662,13 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
case MVT::i1:
+ if (Subtarget->hasAVX512()) {
+ // Need to copy to a VK1 register.
+ unsigned ResultReg = createResultReg(&X86::VK1RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg);
+ return ResultReg;
+ }
case MVT::i8:
return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
X86::sub_8bit);
@@ -3663,7 +3690,12 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
unsigned Opc = 0;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
- case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH;
+ case MVT::i1:
+ // TODO: Support this properly.
+ if (Subtarget->hasAVX512())
+ return 0;
+ VT = MVT::i8;
+ LLVM_FALLTHROUGH;
case MVT::i8: Opc = X86::MOV8ri; break;
case MVT::i16: Opc = X86::MOV16ri; break;
case MVT::i32: Opc = X86::MOV32ri; break;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index c2d8f8a8fa6..33054e3fde5 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2183,28 +2183,26 @@ let Predicates = [HasBWI] in {
// GR from/to mask register
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
- (COPY_TO_REGCLASS GR16:$src, VK16)>;
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
- (COPY_TO_REGCLASS VK16:$src, GR16)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
- (COPY_TO_REGCLASS GR8:$src, VK8)>;
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
- (COPY_TO_REGCLASS VK8:$src, GR8)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
(KMOVWrk VK16:$src)>;
def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
- (i32 (INSERT_SUBREG (IMPLICIT_DEF),
- (i16 (COPY_TO_REGCLASS VK16:$src, GR16)), sub_16bit))>;
+ (COPY_TO_REGCLASS VK16:$src, GR32)>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
- (MOVZX32rr8 (COPY_TO_REGCLASS VK8:$src, GR8))>, Requires<[NoDQI]>;
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
(KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
- (i32 (INSERT_SUBREG (IMPLICIT_DEF),
- (i8 (COPY_TO_REGCLASS VK8:$src, GR8)), sub_8bit))>;
+ (COPY_TO_REGCLASS VK8:$src, GR32)>;
def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
(COPY_TO_REGCLASS GR32:$src, VK32)>;
@@ -3288,6 +3286,23 @@ def : Pat<(masked_store addr:$dst, Mask,
}
+multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
+ AVX512VLVectorVTInfo _,
+ dag Mask, RegisterClass MaskRC,
+ SubRegIndex subreg> {
+
+def : Pat<(masked_store addr:$dst, Mask,
+ (_.info512.VT (insert_subvector undef,
+ (_.info256.VT (insert_subvector undef,
+ (_.info128.VT _.info128.RC:$src),
+ (iPTR 0))),
+ (iPTR 0)))),
+ (!cast<Instruction>(InstrStr#mrk) addr:$dst,
+ (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+
+}
+
multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC> {
@@ -3314,22 +3329,50 @@ def : Pat<(_.info128.VT (extract_subvector
}
+multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
+ AVX512VLVectorVTInfo _,
+ dag Mask, RegisterClass MaskRC,
+ SubRegIndex subreg> {
+
+def : Pat<(_.info128.VT (extract_subvector
+ (_.info512.VT (masked_load addr:$srcAddr, Mask,
+ (_.info512.VT (bitconvert
+ (v16i32 immAllZerosV))))),
+ (iPTR 0))),
+ (!cast<Instruction>(InstrStr#rmkz)
+ (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ addr:$srcAddr)>;
+
+def : Pat<(_.info128.VT (extract_subvector
+ (_.info512.VT (masked_load addr:$srcAddr, Mask,
+ (_.info512.VT (insert_subvector undef,
+ (_.info256.VT (insert_subvector undef,
+ (_.info128.VT (X86vzmovl _.info128.RC:$src)),
+ (iPTR 0))),
+ (iPTR 0))))),
+ (iPTR 0))),
+ (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
+ (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ addr:$srcAddr)>;
+
+}
+
defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
-defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
- (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
-defm : avx512_store_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
- (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
+defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
+ (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
+defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
-defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
- (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
-defm : avx512_load_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
- (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
+defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
+ (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
+defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
@@ -3340,7 +3383,7 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
- (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
+ (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)),
(COPY_TO_REGCLASS VR128X:$src, FR32X))>;
let hasSideEffects = 0 in
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 95f192b7d0a..bb3889a2070 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6309,8 +6309,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
// SrcReg(MaskReg) -> DestReg(GR64)
// SrcReg(MaskReg) -> DestReg(GR32)
- // SrcReg(MaskReg) -> DestReg(GR16)
- // SrcReg(MaskReg) -> DestReg(GR8)
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(SrcReg)) {
@@ -6320,21 +6318,10 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
}
if (X86::GR32RegClass.contains(DestReg))
return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
- if (X86::GR16RegClass.contains(DestReg)) {
- DestReg = getX86SubSuperRegister(DestReg, 32);
- return X86::KMOVWrk;
- }
- if (X86::GR8RegClass.contains(DestReg)) {
- assert(!isHReg(DestReg) && "Cannot move between mask and h-reg");
- DestReg = getX86SubSuperRegister(DestReg, 32);
- return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
- }
}
// SrcReg(GR64) -> DestReg(MaskReg)
// SrcReg(GR32) -> DestReg(MaskReg)
- // SrcReg(GR16) -> DestReg(MaskReg)
- // SrcReg(GR8) -> DestReg(MaskReg)
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(DestReg)) {
@@ -6344,15 +6331,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
}
if (X86::GR32RegClass.contains(SrcReg))
return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
- if (X86::GR16RegClass.contains(SrcReg)) {
- SrcReg = getX86SubSuperRegister(SrcReg, 32);
- return X86::KMOVWkr;
- }
- if (X86::GR8RegClass.contains(SrcReg)) {
- assert(!isHReg(SrcReg) && "Cannot move between mask and h-reg");
- SrcReg = getX86SubSuperRegister(SrcReg, 32);
- return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
- }
}
OpenPOWER on IntegriCloud