diff options
author | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2018-02-08 22:41:47 +0000 |
---|---|---|
committer | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2018-02-08 22:41:47 +0000 |
commit | da9e81c462fdebd93c78ab2aa54782840587e9f4 (patch) | |
tree | 33a02a1dc383cff562e07962a158fec50fb6810e /llvm/lib | |
parent | a85c4fc0291613b4ca0f60850a73c46dcaea69ae (diff) | |
download | bcm5719-llvm-da9e81c462fdebd93c78ab2aa54782840587e9f4.tar.gz bcm5719-llvm-da9e81c462fdebd93c78ab2aa54782840587e9f4.zip |
[GlobalISel][X86] Fixing failures after https://reviews.llvm.org/D37775
The patch essentially makes sure that X86CallLowering adds proper
G_COPY/G_TRUNC and G_ANYEXT/G_COPY when we are doing lowering of
arguments/returns for floating point values passed on registers.
Tests are updated accordingly
Reviewed By: qcolombet
Differential Revision: https://reviews.llvm.org/D42287
llvm-svn: 324665
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86CallLowering.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstructionSelector.cpp | 58 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86LegalizerInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterBankInfo.cpp | 21 |
4 files changed, 110 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp index ccb982f9ac1..80dd872d1ba 100644 --- a/llvm/lib/Target/X86/X86CallLowering.cpp +++ b/llvm/lib/Target/X86/X86CallLowering.cpp @@ -126,7 +126,25 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { void assignValueToReg(unsigned ValVReg, unsigned PhysReg, CCValAssign &VA) override { MIB.addUse(PhysReg, RegState::Implicit); - unsigned ExtReg = extendRegister(ValVReg, VA); + + unsigned ExtReg; + // If we are copying the value to a physical register with the + // size larger than the size of the value itself - build AnyExt + // to the size of the register first and only then do the copy. + // The example of that would be copying from s32 to xmm0, for which + // case ValVT == LocVT == MVT::f32. If LocSize and ValSize are not equal + // we expect normal extendRegister mechanism to work. + unsigned PhysRegSize = + MRI.getTargetRegisterInfo()->getRegSizeInBits(PhysReg, MRI); + unsigned ValSize = VA.getValVT().getSizeInBits(); + unsigned LocSize = VA.getLocVT().getSizeInBits(); + if (PhysRegSize > ValSize && LocSize == ValSize) { + assert((PhysRegSize == 128 || PhysRegSize == 80) && "We expect that to be 128 bit"); + auto MIB = MIRBuilder.buildAnyExt(LLT::scalar(PhysRegSize), ValVReg); + ExtReg = MIB->getOperand(0).getReg(); + } else + ExtReg = extendRegister(ValVReg, VA); + MIRBuilder.buildCopy(PhysReg, ExtReg); } @@ -229,10 +247,28 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { void assignValueToReg(unsigned ValVReg, unsigned PhysReg, CCValAssign &VA) override { markPhysRegUsed(PhysReg); + switch (VA.getLocInfo()) { - default: + default: { + // If we are copying the value from a physical register with the + // size larger than the size of the value itself - build the copy + // of the phys reg first and then build the truncation of that copy. + // The example of that would be copying from xmm0 to s32, for which + // case ValVT == LocVT == MVT::f32. If LocSize and ValSize are not equal + // we expect this to be handled in SExt/ZExt/AExt case. + unsigned PhysRegSize = + MRI.getTargetRegisterInfo()->getRegSizeInBits(PhysReg, MRI); + unsigned ValSize = VA.getValVT().getSizeInBits(); + unsigned LocSize = VA.getLocVT().getSizeInBits(); + if (PhysRegSize > ValSize && LocSize == ValSize) { + auto Copy = MIRBuilder.buildCopy(LLT::scalar(PhysRegSize), PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + return; + } + MIRBuilder.buildCopy(ValVReg, PhysReg); break; + } case CCValAssign::LocInfo::SExt: case CCValAssign::LocInfo::ZExt: case CCValAssign::LocInfo::AExt: { diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp index 44bbc3f1b3f..d538ef1f351 100644 --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -104,6 +104,11 @@ private: MachineFunction &MF) const; bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI, + const unsigned DstReg, + const TargetRegisterClass *DstRC, + const unsigned SrcReg, + const TargetRegisterClass *SrcRC) const; bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const; @@ -640,6 +645,31 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +// Helper function for selectTrunc and selectAnyext. +// Returns true if DstRC lives on a floating register class and +// SrcRC lives on a 128-bit vector class. +static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC) { + return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass || + DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) && + (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass); +} + +bool X86InstructionSelector::selectTurnIntoCOPY( + MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg, + const TargetRegisterClass *DstRC, const unsigned SrcReg, + const TargetRegisterClass *SrcRC) const { + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) + << " operand\n"); + return false; + } + I.setDesc(TII.get(X86::COPY)); + return true; +} + bool X86InstructionSelector::selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -659,15 +689,19 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I, return false; } - if (DstRB.getID() != X86::GPRRegBankID) - return false; - const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); - if (!DstRC) + const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); + + if (!DstRC || !SrcRC) return false; - const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); - if (!SrcRC) + // If that's truncation of the value that lives on the vector class and goes + // into the floating class, just replace it with copy, as we are able to + // select it as a regular move. + if (canTurnIntoCOPY(DstRC, SrcRC)) + return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC); + + if (DstRB.getID() != X86::GPRRegBankID) return false; unsigned SubIdx; @@ -765,12 +799,18 @@ bool X86InstructionSelector::selectAnyext(MachineInstr &I, assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && "G_ANYEXT incorrect operand size"); - if (DstRB.getID() != X86::GPRRegBankID) - return false; - const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); + // If that's ANY_EXT of the value that lives on the floating class and goes + // into the vector class, just replace it with copy, as we are able to select + // it as a regular move. + if (canTurnIntoCOPY(SrcRC, DstRC)) + return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC); + + if (DstRB.getID() != X86::GPRRegBankID) + return false; + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp index a6a9d08278d..56a096b0d44 100644 --- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp @@ -92,6 +92,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + const LLT s128 = LLT::scalar(128); for (auto Ty : {p0, s1, s8, s16, s32}) setAction({G_IMPLICIT_DEF, Ty}, Legal); @@ -136,6 +137,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { setAction({G_SEXT, Ty}, Legal); setAction({G_ANYEXT, Ty}, Legal); } + setAction({G_ANYEXT, s128}, Legal); // Comparison setAction({G_ICMP, s1}, Legal); diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp index aa0e3743c94..5d4d70e47c7 100644 --- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp @@ -73,6 +73,8 @@ X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) { return PMI_GPR32; case 64: return PMI_GPR64; + case 128: + return PMI_VEC128; break; default: llvm_unreachable("Unsupported register size."); @@ -83,6 +85,8 @@ X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) { return PMI_FP32; case 64: return PMI_FP64; + case 128: + return PMI_VEC128; default: llvm_unreachable("Unsupported register size."); } @@ -190,6 +194,23 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Instruction having only floating-point operands (all scalars in VECRReg) getInstrPartialMappingIdxs(MI, MRI, /* isFP */ true, OpRegBankIdx); break; + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_ANYEXT: { + auto &Op0 = MI.getOperand(0); + auto &Op1 = MI.getOperand(1); + const LLT Ty0 = MRI.getType(Op0.getReg()); + const LLT Ty1 = MRI.getType(Op1.getReg()); + + bool isFPTrunc = (Ty0.getSizeInBits() == 32 || Ty0.getSizeInBits() == 64) && + Ty1.getSizeInBits() == 128 && Opc == TargetOpcode::G_TRUNC; + bool isFPAnyExt = + Ty0.getSizeInBits() == 128 && + (Ty1.getSizeInBits() == 32 || Ty1.getSizeInBits() == 64) && + Opc == TargetOpcode::G_ANYEXT; + + getInstrPartialMappingIdxs(MI, MRI, /* isFP */ isFPTrunc || isFPAnyExt, + OpRegBankIdx); + } break; default: // Track the bank of each register, use NotFP mapping (all scalars in GPRs) getInstrPartialMappingIdxs(MI, MRI, /* isFP */ false, OpRegBankIdx); |