summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-08-03 00:37:34 +0000
committerCraig Topper <craig.topper@intel.com>2018-08-03 00:37:34 +0000
commit2c095444a4719a8682c39c45641b758ea697f424 (patch)
tree07896458695aef389f1a7e87f493496507a13d86 /llvm/lib
parent5937368d4f45d9d1356634ad78983f4514929421 (diff)
downloadbcm5719-llvm-2c095444a4719a8682c39c45641b758ea697f424.tar.gz
bcm5719-llvm-2c095444a4719a8682c39c45641b758ea697f424.zip
[X86] Prevent promotion of i16 add/sub/and/or/xor to i32 if we can fold an atomic load and atomic store.
This makes them consistent with i8/i32/i64. Which still seems to be more aggressive on folding than icc, gcc, or MSVC. llvm-svn: 338795
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td10
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp8
3 files changed, 32 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f1be359862c..9cb7ed0c64f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39768,6 +39768,19 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
return Ld->getBasePtr() == St->getBasePtr();
};
+ auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) {
+ if (!Load.hasOneUse() || Load.getOpcode() != ISD::ATOMIC_LOAD)
+ return false;
+ if (!Op.hasOneUse())
+ return false;
+ SDNode *User = *Op->use_begin();
+ if (User->getOpcode() != ISD::ATOMIC_STORE)
+ return false;
+ auto *Ld = cast<AtomicSDNode>(Load);
+ auto *St = cast<AtomicSDNode>(User);
+ return Ld->getBasePtr() == St->getBasePtr();
+ };
+
bool Commute = false;
switch (Op.getOpcode()) {
default: return false;
@@ -39802,6 +39815,9 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
((Commute && !isa<ConstantSDNode>(N1)) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
return false;
+ if (IsFoldableAtomicRMW(N0, Op) ||
+ (Commute && IsFoldableAtomicRMW(N1, Op)))
+ return false;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index f5c3463c57a..3264c5b6930 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -896,8 +896,14 @@ multiclass RELEASE_BINOP_MI<SDNode op> {
"#BINOP "#NAME#"8mr PSEUDO!",
[(atomic_store_8 addr:$dst, (op
(atomic_load_8 addr:$dst), GR8:$src))]>;
- // NAME#16 is not generated as 16-bit arithmetic instructions are considered
- // costly and avoided as far as possible by this backend anyway
+ def NAME#16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
+ "#BINOP "#NAME#"16mi PSEUDO!",
+ [(atomic_store_16 addr:$dst, (op
+ (atomic_load_16 addr:$dst), (i16 imm:$src)))]>;
+ def NAME#16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
+ "#BINOP "#NAME#"16mr PSEUDO!",
+ [(atomic_store_16 addr:$dst, (op
+ (atomic_load_16 addr:$dst), GR16:$src))]>;
def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
"#BINOP "#NAME#"32mi PSEUDO!",
[(atomic_store_32 addr:$dst, (op
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index a5aaa69b9fa..a31aff94604 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -601,24 +601,32 @@ ReSimplify:
case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify;
+ case X86::RELEASE_ADD16mi: OutMI.setOpcode(X86::ADD16mi); goto ReSimplify;
+ case X86::RELEASE_ADD16mr: OutMI.setOpcode(X86::ADD16mr); goto ReSimplify;
case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify;
case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify;
case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify;
+ case X86::RELEASE_AND16mi: OutMI.setOpcode(X86::AND16mi); goto ReSimplify;
+ case X86::RELEASE_AND16mr: OutMI.setOpcode(X86::AND16mr); goto ReSimplify;
case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify;
case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify;
case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify;
+ case X86::RELEASE_OR16mi: OutMI.setOpcode(X86::OR16mi); goto ReSimplify;
+ case X86::RELEASE_OR16mr: OutMI.setOpcode(X86::OR16mr); goto ReSimplify;
case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify;
case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify;
case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify;
+ case X86::RELEASE_XOR16mi: OutMI.setOpcode(X86::XOR16mi); goto ReSimplify;
+ case X86::RELEASE_XOR16mr: OutMI.setOpcode(X86::XOR16mr); goto ReSimplify;
case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify;
case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
OpenPOWER on IntegriCloud