diff options
| author | Artyom Skrobov <Artyom.Skrobov@arm.com> | 2015-05-14 12:59:46 +0000 | 
|---|---|---|
| committer | Artyom Skrobov <Artyom.Skrobov@arm.com> | 2015-05-14 12:59:46 +0000 | 
| commit | a70dfe18d3163b3c7ebe940de753e123ee1f1345 (patch) | |
| tree | 2e904e03a56cbd298de077d674ee8dcc3a0c8684 | |
| parent | 708fa8e89937f25a3fbb6150d573c771119d3144 (diff) | |
| download | bcm5719-llvm-a70dfe18d3163b3c7ebe940de753e123ee1f1345.tar.gz bcm5719-llvm-a70dfe18d3163b3c7ebe940de753e123ee1f1345.zip  | |
Re-apply r237247 - [AArch64] Codegen VMAX/VMIN for safe math cases
No longer breaks SPEC2000/2006
llvm-svn: 237361
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 115 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-fmax.ll | 34 | 
3 files changed, 105 insertions, 47 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6d2a95129c3..4b93f0780e3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -491,6 +491,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,    setTargetDAGCombine(ISD::SELECT);    setTargetDAGCombine(ISD::VSELECT); +  setTargetDAGCombine(ISD::SELECT_CC);    setTargetDAGCombine(ISD::INTRINSIC_VOID);    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -3567,7 +3568,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {  /// operations would *not* be semantically equivalent.  static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {    if (Cmp == Result) -    return true; +    return (Cmp.getValueType() == MVT::f32 || +            Cmp.getValueType() == MVT::f64);    ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);    ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result); @@ -3701,46 +3703,6 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,    assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);    assert(LHS.getValueType() == RHS.getValueType());    EVT VT = TVal.getValueType(); - -  // Try to match this select into a max/min operation, which have dedicated -  // opcode in the instruction set. -  // FIXME: This is not correct in the presence of NaNs, so we only enable this -  // in no-NaNs mode. -  if (getTargetMachine().Options.NoNaNsFPMath) { -    SDValue MinMaxLHS = TVal, MinMaxRHS = FVal; -    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) && -        selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) { -      CC = ISD::getSetCCSwappedOperands(CC); -      std::swap(MinMaxLHS, MinMaxRHS); -    } - -    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) && -        selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) { -      switch (CC) { -      default: -        break; -      case ISD::SETGT: -      case ISD::SETGE: -      case ISD::SETUGT: -      case ISD::SETUGE: -      case ISD::SETOGT: -      case ISD::SETOGE: -        return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS); -        break; -      case ISD::SETLT: -      case ISD::SETLE: -      case ISD::SETULT: -      case ISD::SETULE: -      case ISD::SETOLT: -      case ISD::SETOLE: -        return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS); -        break; -      } -    } -  } - -  // If that fails, we'll need to perform an FCMP + CSEL sequence.  Go ahead -  // and do the comparison.    SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);    // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally @@ -8735,6 +8697,75 @@ static SDValue performSelectCombine(SDNode *N,    return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));  } +/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC +/// to match FMIN/FMAX patterns. +static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) { +  // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y". +  // Unless the NoNaNsFPMath option is set, be careful about NaNs: +  // vmax/vmin return NaN if either operand is a NaN; +  // only do the transformation when it matches that behavior. + +  SDValue CondLHS = N->getOperand(0); +  SDValue CondRHS = N->getOperand(1); +  SDValue LHS = N->getOperand(2); +  SDValue RHS = N->getOperand(3); +  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); + +  unsigned Opcode; +  bool IsReversed; +  if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) && +      selectCCOpsAreFMaxCompatible(CondRHS, RHS)) { +    IsReversed = false; // x CC y ? x : y +  } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) && +             selectCCOpsAreFMaxCompatible(CondLHS, RHS)) { +    IsReversed = true ; // x CC y ? y : x +  } else { +    return SDValue(); +  } + +  bool IsUnordered = false, IsOrEqual; +  switch (CC) { +  default: +    return SDValue(); +  case ISD::SETULT: +  case ISD::SETULE: +    IsUnordered = true; +  case ISD::SETOLT: +  case ISD::SETOLE: +  case ISD::SETLT: +  case ISD::SETLE: +    IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE); +    Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN; +    break; + +  case ISD::SETUGT: +  case ISD::SETUGE: +    IsUnordered = true; +  case ISD::SETOGT: +  case ISD::SETOGE: +  case ISD::SETGT: +  case ISD::SETGE: +    IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE); +    Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX; +    break; +  } + +  // If LHS is NaN, an ordered comparison will be false and the result will be +  // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking +  // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN. +  if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) +    return SDValue(); + +  // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true, +  // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be +  // used for unsafe math or if one of the operands is known to be nonzero. +  if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath && +      !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) +    return SDValue(); + +  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); +} +  SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,                                                   DAGCombinerInfo &DCI) const {    SelectionDAG &DAG = DCI.DAG; @@ -8767,6 +8798,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,      return performSelectCombine(N, DCI);    case ISD::VSELECT:      return performVSelectCombine(N, DCI.DAG); +  case ISD::SELECT_CC: +    return performSelectCCCombine(N, DCI.DAG);    case ISD::STORE:      return performSTORECombine(N, DCI, DAG, Subtarget);    case AArch64ISD::BRCOND: diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 36b24ef99df..5218234abeb 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3521,9 +3521,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {      //   c = fcmp [?gt, ?ge, ?lt, ?le] a, b      //   select c, a, b      // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'. -    // FIXME: There is similar code that allows some extensions in -    // AArch64TargetLowering::LowerSELECT_CC that should be shared with this -    // code.      bool swapSides = false;      if (!getTargetMachine().Options.NoNaNsFPMath) {        // transformability may depend on which way around we compare diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll index 94b745437bd..ea281528b84 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll @@ -1,29 +1,49 @@  ; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s +; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE -define double @test_direct(float %in) #1 { +define double @test_direct(float %in) {  ; CHECK-LABEL: test_direct: +; CHECK-SAFE-LABEL: test_direct:    %cmp = fcmp olt float %in, 0.000000e+00    %longer = fpext float %in to double    %val = select i1 %cmp, double 0.000000e+00, double %longer    ret double %val  ; CHECK: fmax +; CHECK-SAFE: fmax  } -define double @test_cross(float %in) #1 { +define double @test_cross(float %in) {  ; CHECK-LABEL: test_cross: +; CHECK-SAFE-LABEL: test_cross: +  %cmp = fcmp ult float %in, 0.000000e+00 +  %longer = fpext float %in to double +  %val = select i1 %cmp, double %longer, double 0.000000e+00 +  ret double %val + +; CHECK: fmin +; CHECK-SAFE: fmin +} + +; Same as previous, but with ordered comparison; +; can't be converted in safe-math mode. +define double @test_cross_fail_nan(float %in) { +; CHECK-LABEL: test_cross_fail_nan: +; CHECK-SAFE-LABEL: test_cross_fail_nan:    %cmp = fcmp olt float %in, 0.000000e+00    %longer = fpext float %in to double    %val = select i1 %cmp, double %longer, double 0.000000e+00    ret double %val  ; CHECK: fmin +; CHECK-SAFE: fcsel d0, d1, d0, mi  }  ; This isn't a min or a max, but passes the first condition for swapping the  ; results. Make sure they're put back before we resort to the normal fcsel.  define float @test_cross_fail(float %lhs, float %rhs) {  ; CHECK-LABEL: test_cross_fail: +; CHECK-SAFE-LABEL: test_cross_fail:    %tst = fcmp une float %lhs, %rhs    %res = select i1 %tst, float %rhs, float %lhs    ret float %res @@ -31,4 +51,12 @@ define float @test_cross_fail(float %lhs, float %rhs) {    ; The register allocator would have to decide to be deliberately obtuse before    ; other register were used.  ; CHECK: fcsel s0, s1, s0, ne -}
\ No newline at end of file +; CHECK-SAFE: fcsel s0, s1, s0, ne +} + +; Make sure the transformation isn't triggered for integers +define i64 @test_integer(i64  %in) { +  %cmp = icmp slt i64 %in, 0 +  %val = select i1 %cmp, i64 0, i64 %in +  ret i64 %val +}  | 

