summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h22
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp10
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp199
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h6
-rw-r--r--llvm/test/CodeGen/X86/pr30821.mir133
5 files changed, 289 insertions, 81 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index d6fc4482fc1..9e88c08c76c 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -225,6 +225,17 @@ public:
return 0;
}
+ /// Optional extension of isLoadFromStackSlot that returns the number of
+ /// bytes loaded from the stack. This must be implemented if a backend
+ /// supports partial stack slot spills/loads to further disambiguate
+ /// what the load does.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex,
+ unsigned &MemBytes) const {
+ MemBytes = 0;
+ return isLoadFromStackSlot(MI, FrameIndex);
+ }
+
/// Check for post-frame ptr elimination stack locations as well.
/// This uses a heuristic so it isn't reliable for correctness.
virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI,
@@ -252,6 +263,17 @@ public:
return 0;
}
+ /// Optional extension of isStoreToStackSlot that returns the number of
+ /// bytes stored to the stack. This must be implemented if a backend
+ /// supports partial stack slot spills/loads to further disambiguate
+ /// what the store does.
+ virtual unsigned isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex,
+ unsigned &MemBytes) const {
+ MemBytes = 0;
+ return isStoreToStackSlot(MI, FrameIndex);
+ }
+
/// Check for post-frame ptr elimination stack locations as well.
/// This uses a heuristic, so it isn't reliable for correctness.
virtual unsigned isStoreToStackSlotPostFE(const MachineInstr &MI,
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index 1d0aa687337..17f6b83a619 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -418,7 +418,9 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
unsigned LoadReg = 0;
unsigned StoreReg = 0;
- if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
+ unsigned LoadSize = 0;
+ unsigned StoreSize = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS, LoadSize)))
continue;
// Skip the ...pseudo debugging... instructions between a load and store.
while ((NextMI != E) && NextMI->isDebugValue()) {
@@ -426,9 +428,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
++I;
}
if (NextMI == E) continue;
- if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
+ if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize)))
+ continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 ||
+ LoadSize != StoreSize)
continue;
- if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
++NumDead;
changed = true;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ad92a038107..11ca8b0aa3f 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3939,24 +3939,40 @@ bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op,
return false;
}
-static bool isFrameLoadOpcode(int Opcode) {
+static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
switch (Opcode) {
default:
return false;
case X86::MOV8rm:
+ case X86::KMOVBkm:
+ MemBytes = 1;
+ return true;
case X86::MOV16rm:
+ case X86::KMOVWkm:
+ MemBytes = 2;
+ return true;
case X86::MOV32rm:
+ case X86::MOVSSrm:
+ case X86::VMOVSSZrm:
+ case X86::KMOVDkm:
+ MemBytes = 4;
+ return true;
case X86::MOV64rm:
case X86::LD_Fp64m:
- case X86::MOVSSrm:
case X86::MOVSDrm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDZrm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ case X86::KMOVQkm:
+ MemBytes = 8;
+ return true;
case X86::MOVAPSrm:
case X86::MOVUPSrm:
case X86::MOVAPDrm:
case X86::MOVUPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
- case X86::VMOVSSrm:
case X86::VMOVSDrm:
case X86::VMOVAPSrm:
case X86::VMOVUPSrm:
@@ -3964,131 +3980,142 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::VMOVUPDrm:
case X86::VMOVDQArm:
case X86::VMOVDQUrm:
- case X86::VMOVUPSYrm:
+ case X86::VMOVAPSZ128rm:
+ case X86::VMOVUPSZ128rm:
+ case X86::VMOVAPSZ128rm_NOVLX:
+ case X86::VMOVUPSZ128rm_NOVLX:
+ case X86::VMOVAPDZ128rm:
+ case X86::VMOVUPDZ128rm:
+ case X86::VMOVDQU8Z128rm:
+ case X86::VMOVDQU16Z128rm:
+ case X86::VMOVDQA32Z128rm:
+ case X86::VMOVDQU32Z128rm:
+ case X86::VMOVDQA64Z128rm:
+ case X86::VMOVDQU64Z128rm:
+ MemBytes = 16;
+ return true;
case X86::VMOVAPSYrm:
- case X86::VMOVUPDYrm:
+ case X86::VMOVUPSYrm:
case X86::VMOVAPDYrm:
- case X86::VMOVDQUYrm:
+ case X86::VMOVUPDYrm:
case X86::VMOVDQAYrm:
- case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm:
- case X86::VMOVSSZrm:
- case X86::VMOVSDZrm:
- case X86::VMOVAPSZrm:
- case X86::VMOVAPSZ128rm:
+ case X86::VMOVDQUYrm:
case X86::VMOVAPSZ256rm:
- case X86::VMOVAPSZ128rm_NOVLX:
- case X86::VMOVAPSZ256rm_NOVLX:
- case X86::VMOVUPSZrm:
- case X86::VMOVUPSZ128rm:
case X86::VMOVUPSZ256rm:
- case X86::VMOVUPSZ128rm_NOVLX:
+ case X86::VMOVAPSZ256rm_NOVLX:
case X86::VMOVUPSZ256rm_NOVLX:
- case X86::VMOVAPDZrm:
- case X86::VMOVAPDZ128rm:
case X86::VMOVAPDZ256rm:
- case X86::VMOVUPDZrm:
- case X86::VMOVUPDZ128rm:
case X86::VMOVUPDZ256rm:
- case X86::VMOVDQA32Zrm:
- case X86::VMOVDQA32Z128rm:
+ case X86::VMOVDQU8Z256rm:
+ case X86::VMOVDQU16Z256rm:
case X86::VMOVDQA32Z256rm:
- case X86::VMOVDQU32Zrm:
- case X86::VMOVDQU32Z128rm:
case X86::VMOVDQU32Z256rm:
- case X86::VMOVDQA64Zrm:
- case X86::VMOVDQA64Z128rm:
case X86::VMOVDQA64Z256rm:
- case X86::VMOVDQU64Zrm:
- case X86::VMOVDQU64Z128rm:
case X86::VMOVDQU64Z256rm:
+ MemBytes = 32;
+ return true;
+ case X86::VMOVAPSZrm:
+ case X86::VMOVUPSZrm:
+ case X86::VMOVAPDZrm:
+ case X86::VMOVUPDZrm:
case X86::VMOVDQU8Zrm:
- case X86::VMOVDQU8Z128rm:
- case X86::VMOVDQU8Z256rm:
case X86::VMOVDQU16Zrm:
- case X86::VMOVDQU16Z128rm:
- case X86::VMOVDQU16Z256rm:
- case X86::KMOVBkm:
- case X86::KMOVWkm:
- case X86::KMOVDkm:
- case X86::KMOVQkm:
+ case X86::VMOVDQA32Zrm:
+ case X86::VMOVDQU32Zrm:
+ case X86::VMOVDQA64Zrm:
+ case X86::VMOVDQU64Zrm:
+ MemBytes = 64;
return true;
}
}
-static bool isFrameStoreOpcode(int Opcode) {
+static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
switch (Opcode) {
- default: break;
+ default:
+ return false;
case X86::MOV8mr:
+ case X86::KMOVBmk:
+ MemBytes = 1;
+ return true;
case X86::MOV16mr:
+ case X86::KMOVWmk:
+ MemBytes = 2;
+ return true;
case X86::MOV32mr:
+ case X86::MOVSSmr:
+ case X86::VMOVSSmr:
+ case X86::VMOVSSZmr:
+ case X86::KMOVDmk:
+ MemBytes = 4;
+ return true;
case X86::MOV64mr:
case X86::ST_FpP64m:
- case X86::MOVSSmr:
case X86::MOVSDmr:
+ case X86::VMOVSDmr:
+ case X86::VMOVSDZmr:
+ case X86::MMX_MOVD64mr:
+ case X86::MMX_MOVQ64mr:
+ case X86::MMX_MOVNTQmr:
+ case X86::KMOVQmk:
+ MemBytes = 8;
+ return true;
case X86::MOVAPSmr:
case X86::MOVUPSmr:
case X86::MOVAPDmr:
case X86::MOVUPDmr:
case X86::MOVDQAmr:
case X86::MOVDQUmr:
- case X86::VMOVSSmr:
- case X86::VMOVSDmr:
case X86::VMOVAPSmr:
case X86::VMOVUPSmr:
case X86::VMOVAPDmr:
case X86::VMOVUPDmr:
case X86::VMOVDQAmr:
case X86::VMOVDQUmr:
+ case X86::VMOVUPSZ128mr:
+ case X86::VMOVAPSZ128mr:
+ case X86::VMOVUPSZ128mr_NOVLX:
+ case X86::VMOVAPSZ128mr_NOVLX:
+ case X86::VMOVUPDZ128mr:
+ case X86::VMOVAPDZ128mr:
+ case X86::VMOVDQA32Z128mr:
+ case X86::VMOVDQU32Z128mr:
+ case X86::VMOVDQA64Z128mr:
+ case X86::VMOVDQU64Z128mr:
+ case X86::VMOVDQU8Z128mr:
+ case X86::VMOVDQU16Z128mr:
+ MemBytes = 16;
+ return true;
case X86::VMOVUPSYmr:
case X86::VMOVAPSYmr:
case X86::VMOVUPDYmr:
case X86::VMOVAPDYmr:
case X86::VMOVDQUYmr:
case X86::VMOVDQAYmr:
- case X86::VMOVSSZmr:
- case X86::VMOVSDZmr:
- case X86::VMOVUPSZmr:
- case X86::VMOVUPSZ128mr:
case X86::VMOVUPSZ256mr:
- case X86::VMOVUPSZ128mr_NOVLX:
- case X86::VMOVUPSZ256mr_NOVLX:
- case X86::VMOVAPSZmr:
- case X86::VMOVAPSZ128mr:
case X86::VMOVAPSZ256mr:
- case X86::VMOVAPSZ128mr_NOVLX:
+ case X86::VMOVUPSZ256mr_NOVLX:
case X86::VMOVAPSZ256mr_NOVLX:
- case X86::VMOVUPDZmr:
- case X86::VMOVUPDZ128mr:
case X86::VMOVUPDZ256mr:
- case X86::VMOVAPDZmr:
- case X86::VMOVAPDZ128mr:
case X86::VMOVAPDZ256mr:
- case X86::VMOVDQA32Zmr:
- case X86::VMOVDQA32Z128mr:
+ case X86::VMOVDQU8Z256mr:
+ case X86::VMOVDQU16Z256mr:
case X86::VMOVDQA32Z256mr:
- case X86::VMOVDQU32Zmr:
- case X86::VMOVDQU32Z128mr:
case X86::VMOVDQU32Z256mr:
- case X86::VMOVDQA64Zmr:
- case X86::VMOVDQA64Z128mr:
case X86::VMOVDQA64Z256mr:
- case X86::VMOVDQU64Zmr:
- case X86::VMOVDQU64Z128mr:
case X86::VMOVDQU64Z256mr:
+ MemBytes = 32;
+ return true;
+ case X86::VMOVUPSZmr:
+ case X86::VMOVAPSZmr:
+ case X86::VMOVUPDZmr:
+ case X86::VMOVAPDZmr:
case X86::VMOVDQU8Zmr:
- case X86::VMOVDQU8Z128mr:
- case X86::VMOVDQU8Z256mr:
case X86::VMOVDQU16Zmr:
- case X86::VMOVDQU16Z128mr:
- case X86::VMOVDQU16Z256mr:
- case X86::MMX_MOVD64mr:
- case X86::MMX_MOVQ64mr:
- case X86::MMX_MOVNTQmr:
- case X86::KMOVBmk:
- case X86::KMOVWmk:
- case X86::KMOVDmk:
- case X86::KMOVQmk:
+ case X86::VMOVDQA32Zmr:
+ case X86::VMOVDQU32Zmr:
+ case X86::VMOVDQA64Zmr:
+ case X86::VMOVDQU64Zmr:
+ MemBytes = 64;
return true;
}
return false;
@@ -4096,7 +4123,14 @@ static bool isFrameStoreOpcode(int Opcode) {
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
- if (isFrameLoadOpcode(MI.getOpcode()))
+ unsigned Dummy;
+ return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy);
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex,
+ unsigned &MemBytes) const {
+ if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
return MI.getOperand(0).getReg();
return 0;
@@ -4104,7 +4138,8 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
- if (isFrameLoadOpcode(MI.getOpcode())) {
+ unsigned Dummy;
+ if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) {
unsigned Reg;
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
return Reg;
@@ -4117,7 +4152,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
- if (isFrameStoreOpcode(MI.getOpcode()))
+ unsigned Dummy;
+ return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy);
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex,
+ unsigned &MemBytes) const {
+ if (isFrameStoreOpcode(MI.getOpcode(), MemBytes))
if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
isFrameOperand(MI, 0, FrameIndex))
return MI.getOperand(X86::AddrNumOperands).getReg();
@@ -4126,7 +4168,8 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
- if (isFrameStoreOpcode(MI.getOpcode())) {
+ unsigned Dummy;
+ if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) {
unsigned Reg;
if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
return Reg;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 5b2799d049f..3abc0ad1458 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -238,6 +238,9 @@ public:
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
+ unsigned isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex,
+ unsigned &MemBytes) const override;
/// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
/// stack locations as well. This uses a heuristic so it isn't
/// reliable for correctness.
@@ -246,6 +249,9 @@ public:
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
+ unsigned isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex,
+ unsigned &MemBytes) const override;
/// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
/// stack locations as well. This uses a heuristic so it isn't
/// reliable for correctness.
diff --git a/llvm/test/CodeGen/X86/pr30821.mir b/llvm/test/CodeGen/X86/pr30821.mir
new file mode 100644
index 00000000000..15a6eb55105
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr30821.mir
@@ -0,0 +1,133 @@
+# RUN: llc -x mir < %s -run-pass=greedy,virtregrewriter,stack-slot-coloring | FileCheck %s
+--- |
+ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+ target triple = "x86_64-unknown-linux-gnu"
+
+ define dso_local i32 @main() local_unnamed_addr {
+ entry:
+ ; Dummy IR that just performs some allocas -- the machine IR function
+ ; below is what this test is about.
+ %alpha = alloca i8, align 1
+ %foxtrot = alloca <2 x double>, align 16
+ %india = alloca <2 x double>, align 16
+ ret i32 0
+ }
+
+...
+---
+name: main
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+registers:
+liveins:
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 16
+ adjustsStack: false
+ hasCalls: true
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+ - { id: 0, name: alpha, type: default, offset: 0, size: 1, alignment: 1,
+ stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+ di-variable: '', di-expression: '', di-location: '' }
+ - { id: 1, name: foxtrot, type: default, offset: 0, size: 16, alignment: 16,
+ stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+ di-variable: '', di-expression: '', di-location: '' }
+ - { id: 2, name: india, type: default, offset: 0, size: 16, alignment: 16,
+ stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+ di-variable: '', di-expression: '', di-location: '' }
+constants:
+body: |
+ bb.0.entry:
+ ; To trick stack-slot-colouring to run its dead-store-elimination phase,
+ ; which is at fault, we need the register allocator to run, and spill in two
+ ; places that can have their slots merged. Achieve this by volatile-loading
+ ; data into $xmm[0-14] and volatile storing them later, leaving regalloc only
+ ; $xmm15 to play with in the middle.
+ ; Then, perform two virtreg load-and-store pairs, with the faulty code
+ ; sequence in the middle (MOVSDrm then MOVAPDmr on the same slot). The virtreg
+ ; gets spilt; the corresponding stack slots merged; and faulty code sequence
+ ; eliminated if LLVM is broken.
+
+ ; Make first 15 $xmm registers live
+ $xmm0 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm1 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm2 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm3 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm4 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm5 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm6 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm7 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm8 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm9 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm10 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm11 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm12 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm13 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+ $xmm14 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+
+ ; First vreg load
+ %1:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+
+ ; First faulty sequence; %1 spilt
+ %12:fr64 = MOVSDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india)
+ %13:vr128 = COPY killed %12
+ MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %13 :: (volatile store 16 into %ir.india)
+ ; CHECK: renamable $xmm{{[0-9]+}} = MOVSDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india)
+ ; CHECK-NEXT: MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (volatile store 16 into %ir.india)
+
+ ; Store %1 to avoid it being optimised out, will result in a load-from-spill
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %1 :: (volatile dereferenceable store 16 into %ir.india)
+
+ ; That code sequence a second time, to generate a second spill slot that
+ ; will get coloured and merged.
+ %2:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
+
+ %22:fr64 = MOVSDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india)
+ %23:vr128 = COPY killed %22
+ MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %23 :: (volatile store 16 into %ir.india)
+
+ ; CHECK: renamable $xmm{{[0-9]+}} = MOVSDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india)
+ ; CHECK-NEXT: MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (volatile store 16 into %ir.india)
+
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %2 :: (volatile dereferenceable store 16 into %ir.india)
+
+ ; Stores of first 15 $xmm registers to keep them live across the middle of
+ ; this bb.
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm0 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm1 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm2 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm3 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm4 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm5 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm6 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm7 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm8 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm9 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm10 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm11 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm12 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm13 :: (volatile dereferenceable store 16 into %ir.india)
+ MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm14 :: (volatile dereferenceable store 16 into %ir.india)
+
+ RET 0
+
+...
OpenPOWER on IntegriCloud