diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-08-01 20:49:41 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-08-01 20:49:41 +0000 |
commit | 206f8263489543a65361230e1e7ab7ea2d9c3ac8 (patch) | |
tree | 99855c0e2afd7be967ffb69e26b94876082ac820 /llvm/lib/Target/AMDGPU/SIISelLowering.cpp | |
parent | 6886061dab1a53e361bddc1d480c2fdb7980d37d (diff) | |
download | bcm5719-llvm-206f8263489543a65361230e1e7ab7ea2d9c3ac8.tar.gz bcm5719-llvm-206f8263489543a65361230e1e7ab7ea2d9c3ac8.zip |
AMDGPU: Fix handling of div_scale with undef inputs
The src0 register must match src1 or src2, but if these
were undefined they could end up using different implicit_defed
virtual registers. Force these to use one undef vreg or pick the
defined other register.
Also fixes producing invalid nodes without the right number of
inputs when src2 is undef.
llvm-svn: 309743
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 56 |
1 files changed, 55 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6be94ba5cbf..1b69cbdfcad 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3631,7 +3631,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // 3rd parameter required to be a constant. const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3)); if (!Param) - return DAG.getUNDEF(VT); + return DAG.getMergeValues({ DAG.getUNDEF(VT), DAG.getUNDEF(MVT::i1) }, DL); // Translate to the operands expected by the machine instruction. The // first parameter must be the same as the first instruction. @@ -6005,6 +6005,60 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, legalizeTargetIndependentNode(Node, DAG); return Node; } + + switch (Opcode) { + case AMDGPU::V_DIV_SCALE_F32: + case AMDGPU::V_DIV_SCALE_F64: { + // Satisfy the operand register constraint when one of the inputs is + // undefined. Ordinarily each undef value will have its own implicit_def of + // a vreg, so force these to use a single register. + SDValue Src0 = Node->getOperand(0); + SDValue Src1 = Node->getOperand(1); + SDValue Src2 = Node->getOperand(2); + + if ((Src0.isMachineOpcode() && + Src0.getMachineOpcode() != AMDGPU::IMPLICIT_DEF) && + (Src0 == Src1 || Src0 == Src2)) + break; + + MVT VT = Src0.getValueType().getSimpleVT(); + const TargetRegisterClass *RC = getRegClassFor(VT); + + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT); + + SDValue ImpDef = DAG.getCopyToReg(DAG.getEntryNode(), SDLoc(Node), + UndefReg, Src0, SDValue()); + + // src0 must be the same register as src1 or src2, even if the value is + // undefined, so make sure we don't violate this constraint. + if (Src0.isMachineOpcode() && + Src0.getMachineOpcode() == AMDGPU::IMPLICIT_DEF) { + if (Src1.isMachineOpcode() && + Src1.getMachineOpcode() != AMDGPU::IMPLICIT_DEF) + Src0 = Src1; + else if (Src2.isMachineOpcode() && + Src2.getMachineOpcode() != AMDGPU::IMPLICIT_DEF) + Src0 = Src2; + else { + assert(Src1.getMachineOpcode() == AMDGPU::IMPLICIT_DEF); + Src0 = UndefReg; + Src1 = UndefReg; + } + } else + break; + + SmallVector<SDValue, 4> Ops = { Src0, Src1, Src2 }; + for (unsigned I = 3, N = Node->getNumOperands(); I != N; ++I) + Ops.push_back(Node->getOperand(I)); + + Ops.push_back(ImpDef.getValue(1)); + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); + } + default: + break; + } + return Node; } |