summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-11-27 11:11:41 -0800
committerCraig Topper <craig.topper@intel.com>2019-11-27 11:20:58 -0800
commit9283681e168141bab9a883e48ce1da80b86afca3 (patch)
tree261870aebf6dbe5175bd0451737d2428fe7439b2 /llvm
parent5d21f75b57658db538b5e1764edc775271a651cd (diff)
downloadbcm5719-llvm-9283681e168141bab9a883e48ce1da80b86afca3.tar.gz
bcm5719-llvm-9283681e168141bab9a883e48ce1da80b86afca3.zip
[CriticalAntiDepBreaker] Teach the regmask clobber check to check if any subregister is preserved before considering the super register clobbered
X86 has some calling conventions where bits 127:0 of a vector register are callee saved, but the upper bits aren't. Previously we could detect that the full ymm register was clobbered when the xmm portion was really preserved. This patch checks the subregisters to make sure they aren't preserved. Fixes PR44140 Differential Revision: https://reviews.llvm.org/D70699
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp16
-rw-r--r--llvm/test/CodeGen/X86/pr44140.ll7
2 files changed, 16 insertions, 7 deletions
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 702e7e244bc..8d9d48402b3 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -261,15 +261,25 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isRegMask())
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
- if (MO.clobbersPhysReg(i)) {
+ if (MO.isRegMask()) {
+ auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) {
+ for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI)
+ if (!MO.clobbersPhysReg(*SRI))
+ return false;
+
+ return true;
+ };
+
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ if (ClobbersPhysRegAndSubRegs(i)) {
DefIndices[i] = Count;
KillIndices[i] = ~0u;
KeepRegs.reset(i);
Classes[i] = nullptr;
RegRefs.erase(i);
}
+ }
+ }
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
diff --git a/llvm/test/CodeGen/X86/pr44140.ll b/llvm/test/CodeGen/X86/pr44140.ll
index 9916252e6c4..941f45d2d99 100644
--- a/llvm/test/CodeGen/X86/pr44140.ll
+++ b/llvm/test/CodeGen/X86/pr44140.ll
@@ -10,7 +10,6 @@ define win64cc void @opaque() {
; We need xmm6 to be live from the loop header across all iterations of the loop.
; We shouldn't clobber ymm6 inside the loop.
-; FIXME: We currently clobber ymm6
define i32 @main() {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %start
@@ -23,7 +22,7 @@ define i32 @main() {
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
-; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm6
+; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3
; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
@@ -31,10 +30,10 @@ define i32 @main() {
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5
OpenPOWER on IntegriCloud