diff options
author | Silviu Baranga <silviu.baranga@arm.com> | 2015-08-19 14:11:27 +0000 |
---|---|---|
committer | Silviu Baranga <silviu.baranga@arm.com> | 2015-08-19 14:11:27 +0000 |
commit | ad1b19fcb718b1010aaab492e3cab80ab7a71406 (patch) | |
tree | f466116dfcd807a04d103d33fdc70e70afc1819e /llvm/lib | |
parent | 746da5fe2a407254753965473728ee574d1b906c (diff) | |
download | bcm5719-llvm-ad1b19fcb718b1010aaab492e3cab80ab7a71406.tar.gz bcm5719-llvm-ad1b19fcb718b1010aaab492e3cab80ab7a71406.zip |
[ARM] Add instruction selection patterns for vmin/vmax
Summary:
The mid-end was generating vector smin/smax/umin/umax nodes, but
we were using vbsl to generatate the code. This adds the vmin/vmax
patterns and a test to check that we are now generating vmin/vmax
instructions.
Reviewers: rengolin, jmolloy
Subscribers: aemerson, rengolin, llvm-commits
Differential Revision: http://reviews.llvm.org/D12105
llvm-svn: 245439
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrNEON.td | 8 |
2 files changed, 24 insertions, 6 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index fe8da7ffc80..ebc63e0db24 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -147,6 +147,11 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, setOperationAction(ISD::SABSDIFF, VT, Legal); setOperationAction(ISD::UABSDIFF, VT, Legal); } + if (!VT.isFloatingPoint() && + VT != MVT::v2i64 && VT != MVT::v1i64) + for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + setOperationAction(Opcode, VT, Legal); + } void ARMTargetLowering::addDRTypeForNEON(MVT VT) { @@ -2821,11 +2826,24 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::arm_neon_vminu: + case Intrinsic::arm_neon_vmaxu: { + if (Op.getValueType().isFloatingPoint()) + return SDValue(); + unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu) + ? ISD::UMIN : ISD::UMAX; + return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } case Intrinsic::arm_neon_vmins: case Intrinsic::arm_neon_vmaxs: { // v{min,max}s is overloaded between signed integers and floats. - if (!Op.getValueType().isFloatingPoint()) - return SDValue(); + if (!Op.getValueType().isFloatingPoint()) { + unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) + ? ISD::SMIN : ISD::SMAX; + return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) ? ISD::FMINNAN : ISD::FMAXNAN; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 889b8a97d25..5e9e3876fe3 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5026,10 +5026,10 @@ defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, // VMAX : Vector Maximum defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmax", "s", int_arm_neon_vmaxs, 1>; + "vmax", "s", smax, 1>; defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmax", "u", int_arm_neon_vmaxu, 1>; + "vmax", "u", umax, 1>; def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax", "f32", v2f32, v2f32, fmaxnan, 1>; @@ -5052,10 +5052,10 @@ let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" i // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmin", "s", int_arm_neon_vmins, 1>; + "vmin", "s", smin, 1>; defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmin", "u", int_arm_neon_vminu, 1>; + "vmin", "u", umin, 1>; def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin", "f32", v2f32, v2f32, fminnan, 1>; |