diff options
author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-09-22 09:52:19 +0000 |
---|---|---|
committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-09-22 09:52:19 +0000 |
commit | 6e7879c5e6e26c2c6d00618f12af20012eecff41 (patch) | |
tree | e75271eca7c8a4f68a884aad96748b64ab021bb5 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp | |
parent | 2ce2ab3a4dc1df6ec87a42aa2baa6741c2456e0c (diff) | |
download | bcm5719-llvm-6e7879c5e6e26c2c6d00618f12af20012eecff41.tar.gz bcm5719-llvm-6e7879c5e6e26c2c6d00618f12af20012eecff41.zip |
[Power9] Add exploitation of non-permuting memory ops
This patch corresponds to review:
https://reviews.llvm.org/D19825
The new lxvx/stxvx instructions do not require the swaps to line the elements
up correctly. In order to select them over the lxvd2x/lxvw4x instructions which
require swaps, the patterns for the old instruction have a predicate that
ensures they won't be selected on Power9 and newer CPUs.
llvm-svn: 282143
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index cd75474a76a..1d9181b95d1 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10734,10 +10734,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } // For little endian, VSX stores require generating xxswapd/lxvd2x. + // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. EVT VT = N->getOperand(1).getValueType(); if (VT.isSimple()) { MVT StoreVT = VT.getSimpleVT(); - if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && + if (Subtarget.needsSwapsForVSXMemOps() && (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); @@ -10749,9 +10750,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, EVT VT = LD->getValueType(0); // For little endian, VSX loads require generating lxvd2x/xxswapd. + // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. if (VT.isSimple()) { MVT LoadVT = VT.getSimpleVT(); - if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && + if (Subtarget.needsSwapsForVSXMemOps() && (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) return expandVSXLoadForLE(N, DCI); @@ -11066,7 +11068,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::INTRINSIC_W_CHAIN: { // For little endian, VSX loads require generating lxvd2x/xxswapd. - if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { + // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. + if (Subtarget.needsSwapsForVSXMemOps()) { switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { default: break; @@ -11079,7 +11082,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::INTRINSIC_VOID: { // For little endian, VSX stores require generating xxswapd/stxvd2x. - if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { + // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. + if (Subtarget.needsSwapsForVSXMemOps()) { switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { default: break; |