summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAlexander Ivchenko <alexander.ivchenko@intel.com>2018-02-08 22:41:47 +0000
committerAlexander Ivchenko <alexander.ivchenko@intel.com>2018-02-08 22:41:47 +0000
commitda9e81c462fdebd93c78ab2aa54782840587e9f4 (patch)
tree33a02a1dc383cff562e07962a158fec50fb6810e /llvm/lib
parenta85c4fc0291613b4ca0f60850a73c46dcaea69ae (diff)
downloadbcm5719-llvm-da9e81c462fdebd93c78ab2aa54782840587e9f4.tar.gz
bcm5719-llvm-da9e81c462fdebd93c78ab2aa54782840587e9f4.zip
[GlobalISel][X86] Fixing failures after https://reviews.llvm.org/D37775
The patch essentially makes sure that X86CallLowering adds proper G_COPY/G_TRUNC and G_ANYEXT/G_COPY when we are doing lowering of arguments/returns for floating point values passed on registers. Tests are updated accordingly Reviewed By: qcolombet Differential Revision: https://reviews.llvm.org/D42287 llvm-svn: 324665
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86CallLowering.cpp40
-rw-r--r--llvm/lib/Target/X86/X86InstructionSelector.cpp58
-rw-r--r--llvm/lib/Target/X86/X86LegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86RegisterBankInfo.cpp21
4 files changed, 110 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp
index ccb982f9ac1..80dd872d1ba 100644
--- a/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -126,7 +126,25 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
- unsigned ExtReg = extendRegister(ValVReg, VA);
+
+ unsigned ExtReg;
+ // If we are copying the value to a physical register with the
+ // size larger than the size of the value itself - build AnyExt
+ // to the size of the register first and only then do the copy.
+ // The example of that would be copying from s32 to xmm0, for which
+ // case ValVT == LocVT == MVT::f32. If LocSize and ValSize are not equal
+ // we expect normal extendRegister mechanism to work.
+ unsigned PhysRegSize =
+ MRI.getTargetRegisterInfo()->getRegSizeInBits(PhysReg, MRI);
+ unsigned ValSize = VA.getValVT().getSizeInBits();
+ unsigned LocSize = VA.getLocVT().getSizeInBits();
+ if (PhysRegSize > ValSize && LocSize == ValSize) {
+ assert((PhysRegSize == 128 || PhysRegSize == 80) && "We expect that to be 128 bit");
+ auto MIB = MIRBuilder.buildAnyExt(LLT::scalar(PhysRegSize), ValVReg);
+ ExtReg = MIB->getOperand(0).getReg();
+ } else
+ ExtReg = extendRegister(ValVReg, VA);
+
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
@@ -229,10 +247,28 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
+
switch (VA.getLocInfo()) {
- default:
+ default: {
+ // If we are copying the value from a physical register with the
+ // size larger than the size of the value itself - build the copy
+ // of the phys reg first and then build the truncation of that copy.
+ // The example of that would be copying from xmm0 to s32, for which
+ // case ValVT == LocVT == MVT::f32. If LocSize and ValSize are not equal
+ // we expect this to be handled in SExt/ZExt/AExt case.
+ unsigned PhysRegSize =
+ MRI.getTargetRegisterInfo()->getRegSizeInBits(PhysReg, MRI);
+ unsigned ValSize = VA.getValVT().getSizeInBits();
+ unsigned LocSize = VA.getLocVT().getSizeInBits();
+ if (PhysRegSize > ValSize && LocSize == ValSize) {
+ auto Copy = MIRBuilder.buildCopy(LLT::scalar(PhysRegSize), PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ return;
+ }
+
MIRBuilder.buildCopy(ValVReg, PhysReg);
break;
+ }
case CCValAssign::LocInfo::SExt:
case CCValAssign::LocInfo::ZExt:
case CCValAssign::LocInfo::AExt: {
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 44bbc3f1b3f..d538ef1f351 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -104,6 +104,11 @@ private:
MachineFunction &MF) const;
bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI,
+ const unsigned DstReg,
+ const TargetRegisterClass *DstRC,
+ const unsigned SrcReg,
+ const TargetRegisterClass *SrcRC) const;
bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -640,6 +645,31 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+// Helper function for selectTrunc and selectAnyext.
+// Returns true if DstRC lives on a floating register class and
+// SrcRC lives on a 128-bit vector class.
+static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC) {
+ return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass ||
+ DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) &&
+ (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass);
+}
+
+bool X86InstructionSelector::selectTurnIntoCOPY(
+ MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg,
+ const TargetRegisterClass *DstRC, const unsigned SrcReg,
+ const TargetRegisterClass *SrcRC) const {
+
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
+ << " operand\n");
+ return false;
+ }
+ I.setDesc(TII.get(X86::COPY));
+ return true;
+}
+
bool X86InstructionSelector::selectTrunc(MachineInstr &I,
MachineRegisterInfo &MRI,
MachineFunction &MF) const {
@@ -659,15 +689,19 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I,
return false;
}
- if (DstRB.getID() != X86::GPRRegBankID)
- return false;
-
const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
- if (!DstRC)
+ const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
+
+ if (!DstRC || !SrcRC)
return false;
- const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
- if (!SrcRC)
+ // If that's truncation of the value that lives on the vector class and goes
+ // into the floating class, just replace it with copy, as we are able to
+ // select it as a regular move.
+ if (canTurnIntoCOPY(DstRC, SrcRC))
+ return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC);
+
+ if (DstRB.getID() != X86::GPRRegBankID)
return false;
unsigned SubIdx;
@@ -765,12 +799,18 @@ bool X86InstructionSelector::selectAnyext(MachineInstr &I,
assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
"G_ANYEXT incorrect operand size");
- if (DstRB.getID() != X86::GPRRegBankID)
- return false;
-
const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
+ // If that's ANY_EXT of the value that lives on the floating class and goes
+ // into the vector class, just replace it with copy, as we are able to select
+ // it as a regular move.
+ if (canTurnIntoCOPY(SrcRC, DstRC))
+ return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC);
+
+ if (DstRB.getID() != X86::GPRRegBankID)
+ return false;
+
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index a6a9d08278d..56a096b0d44 100644
--- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -92,6 +92,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
+ const LLT s128 = LLT::scalar(128);
for (auto Ty : {p0, s1, s8, s16, s32})
setAction({G_IMPLICIT_DEF, Ty}, Legal);
@@ -136,6 +137,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
setAction({G_SEXT, Ty}, Legal);
setAction({G_ANYEXT, Ty}, Legal);
}
+ setAction({G_ANYEXT, s128}, Legal);
// Comparison
setAction({G_ICMP, s1}, Legal);
diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
index aa0e3743c94..5d4d70e47c7 100644
--- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -73,6 +73,8 @@ X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) {
return PMI_GPR32;
case 64:
return PMI_GPR64;
+ case 128:
+ return PMI_VEC128;
break;
default:
llvm_unreachable("Unsupported register size.");
@@ -83,6 +85,8 @@ X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) {
return PMI_FP32;
case 64:
return PMI_FP64;
+ case 128:
+ return PMI_VEC128;
default:
llvm_unreachable("Unsupported register size.");
}
@@ -190,6 +194,23 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Instruction having only floating-point operands (all scalars in VECRReg)
getInstrPartialMappingIdxs(MI, MRI, /* isFP */ true, OpRegBankIdx);
break;
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ANYEXT: {
+ auto &Op0 = MI.getOperand(0);
+ auto &Op1 = MI.getOperand(1);
+ const LLT Ty0 = MRI.getType(Op0.getReg());
+ const LLT Ty1 = MRI.getType(Op1.getReg());
+
+ bool isFPTrunc = (Ty0.getSizeInBits() == 32 || Ty0.getSizeInBits() == 64) &&
+ Ty1.getSizeInBits() == 128 && Opc == TargetOpcode::G_TRUNC;
+ bool isFPAnyExt =
+ Ty0.getSizeInBits() == 128 &&
+ (Ty1.getSizeInBits() == 32 || Ty1.getSizeInBits() == 64) &&
+ Opc == TargetOpcode::G_ANYEXT;
+
+ getInstrPartialMappingIdxs(MI, MRI, /* isFP */ isFPTrunc || isFPAnyExt,
+ OpRegBankIdx);
+ } break;
default:
// Track the bank of each register, use NotFP mapping (all scalars in GPRs)
getInstrPartialMappingIdxs(MI, MRI, /* isFP */ false, OpRegBankIdx);
OpenPOWER on IntegriCloud