diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-12-12 02:30:37 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-12-12 02:30:37 +0000 |
| commit | 1e3a4ebc6e1af1bbacc795d21b15efbe63bfdee2 (patch) | |
| tree | 3242d6474d5b483544f565361bfb9fec2a45dc69 /llvm/lib/Target/R600 | |
| parent | 145d5717f569b0fb3591c3c6acdacf080f1bf4d6 (diff) | |
| download | bcm5719-llvm-1e3a4ebc6e1af1bbacc795d21b15efbe63bfdee2.tar.gz bcm5719-llvm-1e3a4ebc6e1af1bbacc795d21b15efbe63bfdee2.zip | |
R600: Fix min/max matching problems with unordered compares
The returned operand needs to be permuted for the unordered
compares. Also fix incorrectly producing fmin_legacy / fmax_legacy
for f64, which don't exist.
llvm-svn: 224094
Diffstat (limited to 'llvm/lib/Target/R600')
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 85 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.h | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/R600ISelLowering.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 2 |
4 files changed, 60 insertions, 50 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 0e34b4625c8..5783d4398ed 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1038,17 +1038,21 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, } /// \brief Generate Min/Max node -SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL, - EVT VT, - SDValue LHS, - SDValue RHS, - SDValue True, - SDValue False, - SDValue CC, - SelectionDAG &DAG) const { +SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + DAGCombinerInfo &DCI) const { + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return SDValue(); + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); + SelectionDAG &DAG = DCI.DAG; ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); switch (CCOpcode) { case ISD::SETOEQ: @@ -1065,33 +1069,51 @@ SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL, case ISD::SETO: break; case ISD::SETULE: - case ISD::SETULT: + case ISD::SETULT: { + // Unordered. + // + // We will allow this before legalization since we expand unordered compares + // ordinarily. + if (LHS == True) + return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); + return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); + } case ISD::SETOLE: case ISD::SETOLT: case ISD::SETLE: case ISD::SETLT: { - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - break; + // Ordered. Assume ordered for undefined. + + // Only do this after legalization to avoid interfering with other combines + // which might occur. + if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && + !DCI.isCalledByLegalizer()) + return SDValue(); // We need to permute the operands to get the correct NaN behavior. The // selected operand is the second one based on the failing compare with NaN, // so permute it based on the compare type the hardware uses. if (LHS == True) - return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); - return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); + return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); + return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); + } + case ISD::SETUGE: + case ISD::SETUGT: { + if (LHS == True) + return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); + return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); } case ISD::SETGT: case ISD::SETGE: - case ISD::SETUGE: case ISD::SETOGE: - case ISD::SETUGT: case ISD::SETOGT: { - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - break; + if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && + !DCI.isCalledByLegalizer()) + return SDValue(); if (LHS == True) - return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); - return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); + return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); + return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); } case ISD::SETCC_INVALID: llvm_unreachable("Invalid setcc condcode!"); @@ -2276,24 +2298,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, simplifyI24(N1, DCI); return SDValue(); } - case ISD::SELECT_CC: { - SDLoc DL(N); - EVT VT = N->getValueType(0); - - if (VT == MVT::f32 || - (VT == MVT::f64 && - Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue True = N->getOperand(2); - SDValue False = N->getOperand(3); - SDValue CC = N->getOperand(4); - - return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); - } - - break; - } case ISD::SELECT: { SDValue Cond = N->getOperand(0); if (Cond.getOpcode() == ISD::SETCC) { @@ -2306,11 +2310,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, SDValue True = N->getOperand(1); SDValue False = N->getOperand(2); - if (VT == MVT::f32 || - (VT == MVT::f64 && - Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) { - return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); - } + if (VT == MVT::f32) + return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); // TODO: Implement min / max Evergreen instructions. if (VT == MVT::i32 && diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h index 7386eaea73d..64ec0245f98 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.h +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h @@ -145,14 +145,14 @@ public: SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; - SDValue CombineFMinMax(SDLoc DL, - EVT VT, - SDValue LHS, - SDValue RHS, - SDValue True, - SDValue False, - SDValue CC, - SelectionDAG &DAG) const; + SDValue CombineFMinMaxLegacy(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + DAGCombinerInfo &DCI) const; SDValue CombineIMinMax(SDLoc DL, EVT VT, SDValue LHS, diff --git a/llvm/lib/Target/R600/R600ISelLowering.cpp b/llvm/lib/Target/R600/R600ISelLowering.cpp index 1a84546fa5a..fb7514e26da 100644 --- a/llvm/lib/Target/R600/R600ISelLowering.cpp +++ b/llvm/lib/Target/R600/R600ISelLowering.cpp @@ -1118,6 +1118,13 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const SDValue CC = Op.getOperand(4); SDValue Temp; + if (VT == MVT::f32) { + DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); + SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); + if (MinMax) + return MinMax; + } + // LHS and RHS are guaranteed to be the same value type EVT CompareVT = LHS.getValueType(); diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index aeba3518f7b..8c3f1403ca2 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1514,6 +1514,7 @@ let isCommutable = 1 in { defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32", VOP_F32_F32_F32 >; +} // End isCommutable = 1 defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy @@ -1522,6 +1523,7 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy >; +let isCommutable = 1 in { defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>; defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32, sra |

