summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86FastISel.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-05 04:51:31 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-05 04:51:31 +0000
commit7ef6ea324a0ce651aedd3d3b6bc40a76674b85da (patch)
tree6966003d130b028ab7b1528294283a6270ac67a8 /llvm/lib/Target/X86/X86FastISel.cpp
parent227d4279a8e176ee196b6b6aa5e778e4986a1f6e (diff)
downloadbcm5719-llvm-7ef6ea324a0ce651aedd3d3b6bc40a76674b85da.tar.gz
bcm5719-llvm-7ef6ea324a0ce651aedd3d3b6bc40a76674b85da.zip
[AVX-512] Teach fast isel to use masked compare and movss for handling scalar cmp and select sequence when AVX-512 is enabled. This matches the behavior of normal isel.
llvm-svn: 288636
Diffstat (limited to 'llvm/lib/Target/X86/X86FastISel.cpp')
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp73
1 files changed, 69 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 0cc061501e4..44d918f3786 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -170,6 +170,12 @@ private:
const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
X86AddressMode &AM);
+
+ unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill, unsigned Op3,
+ bool Op3IsKill);
};
} // end anonymous namespace.
@@ -2180,9 +2186,36 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
unsigned ResultReg;
-
- if (Subtarget->hasAVX()) {
- const TargetRegisterClass *FR32 = &X86::FR32RegClass;
+
+ if (Subtarget->hasAVX512()) {
+ // If we have AVX512 we can use a mask compare and masked movss/sd.
+ const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
+ const TargetRegisterClass *VK1 = &X86::VK1RegClass;
+
+ unsigned CmpOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+
+ // Need an IMPLICIT_DEF for the input that is used to generate the upper
+ // bits of the result register since its not based on any of the inputs.
+ unsigned ImplicitDefReg = createResultReg(VR128X);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+
+ // Place RHSReg is the passthru of the masked movss/sd operation and put
+ // LHS in the input. The mask input comes from the compare.
+ unsigned MovOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
+ unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
+ CmpReg, true, ImplicitDefReg, true,
+ LHSReg, LHSIsKill);
+
+ ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
+
+ } else if (Subtarget->hasAVX()) {
const TargetRegisterClass *VR128 = &X86::VR128RegClass;
// If we have AVX, create 1 blendv instead of 3 logic instructions.
@@ -2195,7 +2228,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned BlendOpcode =
(RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
- unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill,
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
LHSReg, LHSIsKill, CmpReg, true);
@@ -3849,6 +3882,38 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
return true;
}
+unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill,
+ unsigned Op3, bool Op3IsKill) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+ Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
+ Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 3);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill))
+ .addReg(Op3, getKillRegState(Op3IsKill));
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill))
+ .addReg(Op3, getKillRegState(Op3IsKill));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
namespace llvm {
FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
OpenPOWER on IntegriCloud