diff options
| author | Geoff Berry <gberry@codeaurora.org> | 2016-09-26 15:34:47 +0000 |
|---|---|---|
| committer | Geoff Berry <gberry@codeaurora.org> | 2016-09-26 15:34:47 +0000 |
| commit | 256fcf975f60f51fce9fe7f5992f9e409e60db29 (patch) | |
| tree | 7d2c6f1fa7ec711e852abdb192174d1bb19c7f9a /llvm/lib | |
| parent | e45de8a5ec7ad35cb770cd2eed61c56a81982231 (diff) | |
| download | bcm5719-llvm-256fcf975f60f51fce9fe7f5992f9e409e60db29.tar.gz bcm5719-llvm-256fcf975f60f51fce9fe7f5992f9e409e60db29.zip | |
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 |
3 files changed, 21 insertions, 8 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 6b7d517b5d0..6fa0782222d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -586,6 +586,11 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, return false; Reg = N.getOperand(0); + + // Don't match if free 32-bit -> 64-bit zext can be used instead. + if (Ext == AArch64_AM::UXTW && + Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) + return false; } // AArch64 mandates that the RHS of the operation must use the smallest diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 86f1d972c9d..882ed19480b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -215,6 +215,21 @@ enum NodeType : unsigned { } // end namespace AArch64ISD +namespace { + +// Any instruction that defines a 32-bit result zeros out the high half of the +// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may +// be copying from a truncate. But any other 32-bit operation will zero-extend +// up to 64 bits. +// FIXME: X86 also checks for CMOV here. Do we need something similar? +static inline bool isDef32(const SDNode &N) { + unsigned Opc = N.getOpcode(); + return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && + Opc != ISD::CopyFromReg; +} + +} // end anonymous namespace + class AArch64Subtarget; class AArch64TargetMachine; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index e58ad278215..dad097e07ac 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5272,15 +5272,8 @@ def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0 //---------------------------------------------------------------------------- // FIXME: Like for X86, these should go in their own separate .td file. -// Any instruction that defines a 32-bit result leaves the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. But any other 32-bit operation will zero-extend -// up to 64 bits. -// FIXME: X86 also checks for CMOV here. Do we need something similar? def def32 : PatLeaf<(i32 GPR32:$src), [{ - return N->getOpcode() != ISD::TRUNCATE && - N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && - N->getOpcode() != ISD::CopyFromReg; + return isDef32(*N); }]>; // In the case of a 32-bit def that is known to implicitly zero-extend, |

