diff options
| author | Jun Bum Lim <junbuml@codeaurora.org> | 2015-10-09 14:11:25 +0000 |
|---|---|---|
| committer | Jun Bum Lim <junbuml@codeaurora.org> | 2015-10-09 14:11:25 +0000 |
| commit | 0aace13d18417fcb22fa69e76008b1423abb4e5d (patch) | |
| tree | 8a2f8c1f84a461a531dc2503bc94b5f97c2dfcbe /llvm/lib | |
| parent | 6269236a8ee11c973a9b14b299677c0509002c27 (diff) | |
| download | bcm5719-llvm-0aace13d18417fcb22fa69e76008b1423abb4e5d.tar.gz bcm5719-llvm-0aace13d18417fcb22fa69e76008b1423abb4e5d.zip | |
Improve ISel across lane float min/max reduction
In vectorized float min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
svn0 = vector_shuffle t0, undef<2,3,u,u>
fmin = fminnum t0,svn0
svn1 = vector_shuffle fmin, undef<1,u,u,u>
cc = setcc fmin, svn1, ole
n0 = extract_vector_elt cc, #0
n1 = extract_vector_elt fmin, #0
n2 = extract_vector_elt fmin, #1
result = select n0, n1,n2
into :
result = llvm.aarch64.neon.fminnmv t0
This change extends r247575.
llvm-svn: 249834
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 59 |
1 files changed, 47 insertions, 12 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fa8cad82795..a599997f87a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8750,8 +8750,13 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV, return SDValue(); int NumVecElts = VTy.getVectorNumElements(); - if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16) - return SDValue(); + if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { + if (NumVecElts != 4) + return SDValue(); + } else { + if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16) + return SDValue(); + } int NumExpectedSteps = APInt(8, NumVecElts).logBase2(); SDValue PreOp = OpV; @@ -8802,6 +8807,8 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV, PreOp = CurOp; } unsigned Opcode; + bool IsIntrinsic = false; + switch (Op) { default: llvm_unreachable("Unexpected operator for across vector reduction"); @@ -8820,11 +8827,24 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV, case ISD::UMIN: Opcode = AArch64ISD::UMINV; break; + case ISD::FMAXNUM: + Opcode = Intrinsic::aarch64_neon_fmaxnmv; + IsIntrinsic = true; + break; + case ISD::FMINNUM: + Opcode = Intrinsic::aarch64_neon_fminnmv; + IsIntrinsic = true; + break; } SDLoc DL(N); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), - DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp), - DAG.getConstant(0, DL, MVT::i64)); + + return IsIntrinsic + ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0), + DAG.getConstant(Opcode, DL, MVT::i32), PreOp) + : DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), + DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp), + DAG.getConstant(0, DL, MVT::i64)); } /// Target-specific DAG combine for the across vector min/max reductions. @@ -8848,9 +8868,6 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV, /// becomes : /// %1 = smaxv %0 /// %result = extract_vector_elt %1, 0 -/// FIXME: Currently this function matches only SMAXV, UMAXV, SMINV, and UMINV. -/// We could also support other types of across lane reduction available -/// in AArch64, including FMAXNMV, FMAXV, FMINNMV, and FMINV. static SDValue performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { @@ -8878,17 +8895,26 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG, SDValue VectorOp = SetCC.getOperand(0); unsigned Op = VectorOp->getOpcode(); // Check if the input vector is fed by the operator we want to handle. - if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && Op != ISD::UMIN) + if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && + Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM) return SDValue(); EVT VTy = VectorOp.getValueType(); if (!VTy.isVector()) return SDValue(); - EVT EltTy = VTy.getVectorElementType(); - if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) + if (VTy.getSizeInBits() < 64) return SDValue(); + EVT EltTy = VTy.getVectorElementType(); + if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { + if (EltTy != MVT::f32) + return SDValue(); + } else { + if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) + return SDValue(); + } + // Check if extracting from the same vector. // For example, // %sc = setcc %vector, %svn1, gt @@ -8904,7 +8930,13 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG, if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) || (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) || (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) || - (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE)) + (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) || + (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE && + CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT && + CC != ISD::SETGE) || + (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE && + CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT && + CC != ISD::SETLE)) return SDValue(); // Expect to check only lane 0 from the vector SETCC. @@ -8963,6 +8995,9 @@ performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG, if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) return SDValue(); + if (VTy.getSizeInBits() < 64) + return SDValue(); + return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG); } |

