diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-12-05 04:51:31 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-12-05 04:51:31 +0000 |
commit | 7ef6ea324a0ce651aedd3d3b6bc40a76674b85da (patch) | |
tree | 6966003d130b028ab7b1528294283a6270ac67a8 /llvm/lib/Target/X86/X86FastISel.cpp | |
parent | 227d4279a8e176ee196b6b6aa5e778e4986a1f6e (diff) | |
download | bcm5719-llvm-7ef6ea324a0ce651aedd3d3b6bc40a76674b85da.tar.gz bcm5719-llvm-7ef6ea324a0ce651aedd3d3b6bc40a76674b85da.zip |
[AVX-512] Teach fast isel to use masked compare and movss for handling scalar cmp and select sequence when AVX-512 is enabled. This matches the behavior of normal isel.
llvm-svn: 288636
Diffstat (limited to 'llvm/lib/Target/X86/X86FastISel.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86FastISel.cpp | 73 |
1 files changed, 69 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 0cc061501e4..44d918f3786 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -170,6 +170,12 @@ private: const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, X86AddressMode &AM); + + unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill, unsigned Op3, + bool Op3IsKill); }; } // end anonymous namespace. @@ -2180,9 +2186,36 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); unsigned ResultReg; - - if (Subtarget->hasAVX()) { - const TargetRegisterClass *FR32 = &X86::FR32RegClass; + + if (Subtarget->hasAVX512()) { + // If we have AVX512 we can use a mask compare and masked movss/sd. + const TargetRegisterClass *VR128X = &X86::VR128XRegClass; + const TargetRegisterClass *VK1 = &X86::VK1RegClass; + + unsigned CmpOpcode = + (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr; + unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill, + CmpRHSReg, CmpRHSIsKill, CC); + + // Need an IMPLICIT_DEF for the input that is used to generate the upper + // bits of the result register since its not based on any of the inputs. + unsigned ImplicitDefReg = createResultReg(VR128X); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + + // Place RHSReg is the passthru of the masked movss/sd operation and put + // LHS in the input. The mask input comes from the compare. + unsigned MovOpcode = + (RetVT.SimpleTy == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk; + unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill, + CmpReg, true, ImplicitDefReg, true, + LHSReg, LHSIsKill); + + ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg); + + } else if (Subtarget->hasAVX()) { const TargetRegisterClass *VR128 = &X86::VR128RegClass; // If we have AVX, create 1 blendv instead of 3 logic instructions. @@ -2195,7 +2228,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { unsigned BlendOpcode = (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; - unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill, + unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill, CmpRHSReg, CmpRHSIsKill, CC); unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CmpReg, true); @@ -3849,6 +3882,38 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, return true; } +unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill, + unsigned Op3, bool Op3IsKill) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); + Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 3); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)) + .addReg(Op3, getKillRegState(Op3IsKill)); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)) + .addReg(Op3, getKillRegState(Op3IsKill)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + namespace llvm { FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, |