diff options
author | Sanjay Patel <spatel@rotateright.com> | 2019-12-18 08:47:07 -0500 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2019-12-18 08:47:07 -0500 |
commit | 5e5e99c041e48a69615eefd123dac23d9d0c7f73 (patch) | |
tree | 4917170c06f7589b61443082a3517ca5609b8691 /llvm/lib/Target | |
parent | c7492fbd4e85632a05428bd0281fcfd06f1fff6c (diff) | |
download | bcm5719-llvm-5e5e99c041e48a69615eefd123dac23d9d0c7f73.tar.gz bcm5719-llvm-5e5e99c041e48a69615eefd123dac23d9d0c7f73.zip |
[AArch64] match fcvtl2 with bitcasted extract
This should eliminate a regression seen in D63815.
If we are FP extending the high half extract of a vector,
we should be able to peek through a bitcast sitting
between the extract and extend.
This replaces tablegen patterns with a more general
DAG to DAG override, so we can handle any casted type.
Differential Revision: https://reviews.llvm.org/D71515
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 |
2 files changed, 35 insertions, 6 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index f40652f4fbb..e875844ed70 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -205,6 +205,7 @@ public: bool tryBitfieldInsertOp(SDNode *N); bool tryBitfieldInsertInZeroOp(SDNode *N); bool tryShiftAmountMod(SDNode *N); + bool tryHighFPExt(SDNode *N); bool tryReadRegister(SDNode *N); bool tryWriteRegister(SDNode *N); @@ -1803,6 +1804,35 @@ bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { return true; } +/// Try to form fcvtl2 instructions from a floating-point extend of a high-half +/// extract of a subvector. +bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { + assert(N->getOpcode() == ISD::FP_EXTEND); + + // There are 2 forms of fcvtl2 - extend to double or extend to float. + SDValue Extract = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT NarrowVT = Extract.getValueType(); + if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && + (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) + return false; + + // Optionally look past a bitcast. + Extract = peekThroughBitcasts(Extract); + if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return false; + + // Match extract from start of high half index. + // Example: v8i16 -> v4i16 means the extract must begin at index 4. + unsigned ExtractIndex = Extract.getConstantOperandVal(1); + if (ExtractIndex != Extract.getValueType().getVectorNumElements()) + return false; + + auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; + CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); + return true; +} + static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits = 0, @@ -3010,6 +3040,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { return; break; + case ISD::FP_EXTEND: + if (tryHighFPExt(Node)) + return; + break; + case ISD::OR: if (tryBitfieldInsertOp(Node)) return; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 8dc61e517cf..861c0c0a18a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3646,14 +3646,8 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn) (i64 4)))), (FCVTLv8i16 V128:$Rn)>; def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; -def : Pat<(v2f64 (fpextend (v2f32 (extract_subvector (v4f32 V128:$Rn), - (i64 2))))), - (FCVTLv4i32 V128:$Rn)>; def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; -def : Pat<(v4f32 (fpextend (v4f16 (extract_subvector (v8f16 V128:$Rn), - (i64 4))))), - (FCVTLv8i16 V128:$Rn)>; defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; |