diff options
author | Eli Friedman <efriedma@codeaurora.org> | 2016-10-18 21:03:40 +0000 |
---|---|---|
committer | Eli Friedman <efriedma@codeaurora.org> | 2016-10-18 21:03:40 +0000 |
commit | c0a717ba5bf41ad90ba1ad8151f2b15b79c65998 (patch) | |
tree | c8701a348c439c2afc62edc309a4a3fdcf791df6 /llvm | |
parent | 36efa68463bc380c492cd08ae614c04ba3cd9fd7 (diff) | |
download | bcm5719-llvm-c0a717ba5bf41ad90ba1ad8151f2b15b79c65998.tar.gz bcm5719-llvm-c0a717ba5bf41ad90ba1ad8151f2b15b79c65998.zip |
Improve ARM lowering for "icmp <2 x i64> eq".
The custom lowering is pretty straightforward: basically, just AND
together the two halves of a <4 x i32> compare.
Differential Revision: https://reviews.llvm.org/D25713
llvm-svn: 284536
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 27 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/vicmp-64.ll | 52 |
2 files changed, 73 insertions, 6 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a41c4fcb9cd..dcbf3c5e513 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -278,7 +278,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, } MVT ElemTy = VT.getVectorElementType(); - if (ElemTy != MVT::i64 && ElemTy != MVT::f64) + if (ElemTy != MVT::f64) setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -742,8 +742,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIV, MVT::v8i8, Custom); setOperationAction(ISD::UDIV, MVT::v4i16, Custom); setOperationAction(ISD::UDIV, MVT::v8i8, Custom); - setOperationAction(ISD::SETCC, MVT::v1i64, Expand); - setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with // a destination type that is wider than the source, and nor does // it have a FP_TO_[SU]INT instruction with a narrower destination than @@ -5242,10 +5240,27 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); SDLoc dl(Op); + if (Op0.getValueType().getVectorElementType() == MVT::i64 && + (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) { + // Special-case integer 64-bit equality comparisons. They aren't legal, + // but they can be lowered with a few vector instructions. + unsigned CmpElements = CmpVT.getVectorNumElements() * 2; + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements); + SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0); + SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1); + SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1, + DAG.getCondCode(ISD::SETEQ)); + SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp); + SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed); + Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged); + if (SetCCOpcode == ISD::SETNE) + Merged = DAG.getNOT(dl, Merged, CmpVT); + Merged = DAG.getSExtOrTrunc(Merged, dl, VT); + return Merged; + } + if (CmpVT.getVectorElementType() == MVT::i64) - // 64-bit comparisons are not legal. We've marked SETCC as non-Custom, - // but it's possible that our operands are 64-bit but our result is 32-bit. - // Bail in this case. + // 64-bit comparisons are not legal in general. return SDValue(); if (Op1.getValueType().isFloatingPoint()) { diff --git a/llvm/test/CodeGen/ARM/vicmp-64.ll b/llvm/test/CodeGen/ARM/vicmp-64.ll new file mode 100644 index 00000000000..57e036bde22 --- /dev/null +++ b/llvm/test/CodeGen/ARM/vicmp-64.ll @@ -0,0 +1,52 @@ +; RUN: llc -mtriple=arm -mattr=+neon %s -o - | FileCheck %s + +; Check codegen for 64-bit icmp operations, which don't directly map to any +; instruction. + +define <2 x i64> @vne(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vne: +;CHECK: vceq.i32 +;CHECK-NEXT: vrev64.32 +;CHECK-NEXT: vand +;CHECK-NEXT: vmvn +;CHECK-NEXT: vmov +;CHECK-NEXT: vmov +;CHECK-NEXT: mov pc, lr + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = icmp ne <2 x i64> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 +} + +define <2 x i64> @veq(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: veq: +;CHECK: vceq.i32 +;CHECK-NEXT: vrev64.32 +;CHECK-NEXT: vand +;CHECK-NEXT: vmov +;CHECK-NEXT: vmov +;CHECK-NEXT: mov pc, lr + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = icmp eq <2 x i64> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 +} + +; FIXME: We currently generate terrible code for this. +; (Atop < Btop) | ((ATop == BTop) & (ABottom < BBottom)) +; would come out to roughly 6 instructions, but we currently +; scalarize it. +define <2 x i64> @vult(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vult: +;CHECK: subs +;CHECK: sbcs +;CHECK: subs +;CHECK: sbcs + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = icmp ult <2 x i64> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 +} |