diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86FastISel.cpp | 58 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 79 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 22 |
3 files changed, 106 insertions, 53 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 67a1e12f6cd..7d38abbc2ce 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -367,6 +367,10 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: + // TODO: Support this properly. + if (Subtarget->hasAVX512()) + return false; + LLVM_FALLTHROUGH; case MVT::i8: Opc = X86::MOV8rm; RC = &X86::GR8RegClass; @@ -540,11 +544,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, // In case ValReg is a K register, COPY to a GPR if (MRI.getRegClass(ValReg) == &X86::VK1RegClass) { unsigned KValReg = ValReg; - ValReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass - : &X86::GR8_ABCD_LRegClass); + ValReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ValReg) .addReg(KValReg); + ValReg = fastEmitInst_extractsubreg(MVT::i8, ValReg, /*Kill=*/true, + X86::sub_8bit); } // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); @@ -1280,11 +1285,12 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // In case SrcReg is a K register, COPY to a GPR if (MRI.getRegClass(SrcReg) == &X86::VK1RegClass) { unsigned KSrcReg = SrcReg; - SrcReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass - : &X86::GR8_ABCD_LRegClass); + SrcReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), SrcReg) .addReg(KSrcReg); + SrcReg = fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, + X86::sub_8bit); } SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false); SrcVT = MVT::i8; @@ -1580,11 +1586,12 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { // In case ResultReg is a K register, COPY to a GPR if (MRI.getRegClass(ResultReg) == &X86::VK1RegClass) { unsigned KResultReg = ResultReg; - ResultReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass - : &X86::GR8_ABCD_LRegClass); + ResultReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(KResultReg); + ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, + X86::sub_8bit); } // Set the high bits to zero. @@ -1768,11 +1775,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { unsigned KOpReg = OpReg; - OpReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass - : &X86::GR8_ABCD_LRegClass); + OpReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), OpReg) .addReg(KOpReg); + OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(OpReg) @@ -2113,11 +2121,12 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; - CondReg = createResultReg(Subtarget->is64Bit() ? - &X86::GR8RegClass : &X86::GR8_ABCD_LRegClass); + CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); + CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) @@ -2327,11 +2336,12 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; - CondReg = createResultReg(Subtarget->is64Bit() ? - &X86::GR8RegClass : &X86::GR8_ABCD_LRegClass); + CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); + CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) @@ -3307,6 +3317,16 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Handle zero-extension from i1 to i8, which is common. if (ArgVT == MVT::i1) { + // In case SrcReg is a K register, COPY to a GPR + if (MRI.getRegClass(ArgReg) == &X86::VK1RegClass) { + unsigned KArgReg = ArgReg; + ArgReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ArgReg) + .addReg(KArgReg); + ArgReg = fastEmitInst_extractsubreg(MVT::i8, ArgReg, /*Kill=*/true, + X86::sub_8bit); + } // Set the high bits to zero. ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false); ArgVT = MVT::i8; @@ -3642,6 +3662,13 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); case MVT::i1: + if (Subtarget->hasAVX512()) { + // Need to copy to a VK1 register. + unsigned ResultReg = createResultReg(&X86::VK1RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg); + return ResultReg; + } case MVT::i8: return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, X86::sub_8bit); @@ -3663,7 +3690,12 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { unsigned Opc = 0; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); - case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH; + case MVT::i1: + // TODO: Support this properly. + if (Subtarget->hasAVX512()) + return 0; + VT = MVT::i8; + LLVM_FALLTHROUGH; case MVT::i8: Opc = X86::MOV8ri; break; case MVT::i16: Opc = X86::MOV16ri; break; case MVT::i32: Opc = X86::MOV32ri; break; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c2d8f8a8fa6..33054e3fde5 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2183,28 +2183,26 @@ let Predicates = [HasBWI] in { // GR from/to mask register def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), - (COPY_TO_REGCLASS GR16:$src, VK16)>; + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), - (COPY_TO_REGCLASS VK16:$src, GR16)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), - (COPY_TO_REGCLASS GR8:$src, VK8)>; + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), - (COPY_TO_REGCLASS VK8:$src, GR8)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), (KMOVWrk VK16:$src)>; def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), - (i32 (INSERT_SUBREG (IMPLICIT_DEF), - (i16 (COPY_TO_REGCLASS VK16:$src, GR16)), sub_16bit))>; + (COPY_TO_REGCLASS VK16:$src, GR32)>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), - (MOVZX32rr8 (COPY_TO_REGCLASS VK8:$src, GR8))>, Requires<[NoDQI]>; + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), - (i32 (INSERT_SUBREG (IMPLICIT_DEF), - (i8 (COPY_TO_REGCLASS VK8:$src, GR8)), sub_8bit))>; + (COPY_TO_REGCLASS VK8:$src, GR32)>; def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (COPY_TO_REGCLASS GR32:$src, VK32)>; @@ -3288,6 +3286,23 @@ def : Pat<(masked_store addr:$dst, Mask, } +multiclass avx512_store_scalar_lowering_subreg<string InstrStr, + AVX512VLVectorVTInfo _, + dag Mask, RegisterClass MaskRC, + SubRegIndex subreg> { + +def : Pat<(masked_store addr:$dst, Mask, + (_.info512.VT (insert_subvector undef, + (_.info256.VT (insert_subvector undef, + (_.info128.VT _.info128.RC:$src), + (iPTR 0))), + (iPTR 0)))), + (!cast<Instruction>(InstrStr#mrk) addr:$dst, + (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; + +} + multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, dag Mask, RegisterClass MaskRC> { @@ -3314,22 +3329,50 @@ def : Pat<(_.info128.VT (extract_subvector } +multiclass avx512_load_scalar_lowering_subreg<string InstrStr, + AVX512VLVectorVTInfo _, + dag Mask, RegisterClass MaskRC, + SubRegIndex subreg> { + +def : Pat<(_.info128.VT (extract_subvector + (_.info512.VT (masked_load addr:$srcAddr, Mask, + (_.info512.VT (bitconvert + (v16i32 immAllZerosV))))), + (iPTR 0))), + (!cast<Instruction>(InstrStr#rmkz) + (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + addr:$srcAddr)>; + +def : Pat<(_.info128.VT (extract_subvector + (_.info512.VT (masked_load addr:$srcAddr, Mask, + (_.info512.VT (insert_subvector undef, + (_.info256.VT (insert_subvector undef, + (_.info128.VT (X86vzmovl _.info128.RC:$src)), + (iPTR 0))), + (iPTR 0))))), + (iPTR 0))), + (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, + (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + addr:$srcAddr)>; + +} + defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; -defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, - (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>; -defm : avx512_store_scalar_lowering<"VMOVSDZ", avx512vl_f64_info, - (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>; +defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, + (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; +defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, + (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; -defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, - (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>; -defm : avx512_load_scalar_lowering<"VMOVSDZ", avx512vl_f64_info, - (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>; +defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, + (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; +defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, + (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), @@ -3340,7 +3383,7 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), - (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)), + (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; let hasSideEffects = 0 in diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 95f192b7d0a..bb3889a2070 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6309,8 +6309,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg, // SrcReg(MaskReg) -> DestReg(GR64) // SrcReg(MaskReg) -> DestReg(GR32) - // SrcReg(MaskReg) -> DestReg(GR16) - // SrcReg(MaskReg) -> DestReg(GR8) // All KMASK RegClasses hold the same k registers, can be tested against anyone. if (X86::VK16RegClass.contains(SrcReg)) { @@ -6320,21 +6318,10 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg, } if (X86::GR32RegClass.contains(DestReg)) return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk; - if (X86::GR16RegClass.contains(DestReg)) { - DestReg = getX86SubSuperRegister(DestReg, 32); - return X86::KMOVWrk; - } - if (X86::GR8RegClass.contains(DestReg)) { - assert(!isHReg(DestReg) && "Cannot move between mask and h-reg"); - DestReg = getX86SubSuperRegister(DestReg, 32); - return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk; - } } // SrcReg(GR64) -> DestReg(MaskReg) // SrcReg(GR32) -> DestReg(MaskReg) - // SrcReg(GR16) -> DestReg(MaskReg) - // SrcReg(GR8) -> DestReg(MaskReg) // All KMASK RegClasses hold the same k registers, can be tested against anyone. if (X86::VK16RegClass.contains(DestReg)) { @@ -6344,15 +6331,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg, } if (X86::GR32RegClass.contains(SrcReg)) return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr; - if (X86::GR16RegClass.contains(SrcReg)) { - SrcReg = getX86SubSuperRegister(SrcReg, 32); - return X86::KMOVWkr; - } - if (X86::GR8RegClass.contains(SrcReg)) { - assert(!isHReg(SrcReg) && "Cannot move between mask and h-reg"); - SrcReg = getX86SubSuperRegister(SrcReg, 32); - return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr; - } } |