diff options
author | Warren Ristow <warren.ristow@sony.com> | 2018-04-24 22:01:50 +0000 |
---|---|---|
committer | Warren Ristow <warren.ristow@sony.com> | 2018-04-24 22:01:50 +0000 |
commit | b960d2cb405acb23abb335145d48e37abebd0ada (patch) | |
tree | 78e557c069f111ca37c6024b77ab25062cdaafc6 /llvm/lib | |
parent | b3bfd3b028ce99efb9e65d42a563becae3ea4ddc (diff) | |
download | bcm5719-llvm-b960d2cb405acb23abb335145d48e37abebd0ada.tar.gz bcm5719-llvm-b960d2cb405acb23abb335145d48e37abebd0ada.zip |
[X86] Account for partial stack slot spills (PR30821)
Previously, _any_ store or load instruction was considered to be
operating on a spill if it had a frameindex as an operand, and thus
was fair game for optimisations such as "StackSlotColoring". This
usually works, except on architectures where spills can be partially
restored, for example on X86 where a spilt vector can have a single
component loaded (zeroing the rest of the target register). This can be
mis-interpreted and the zero extension unsoundly eliminated, see
pr30821.
To avoid this, this commit optionally provides the caller to
isLoadFromStackSlot and isStoreToStackSlot with the number of bytes
spilt/loaded by the given instruction. Optimisations can then determine
that a full spill followed by a partial load (or vice versa), for
example, cannot necessarily be commuted.
Patch by Jeremy Morse!
Differential Revision: https://reviews.llvm.org/D44782
llvm-svn: 330778
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/StackSlotColoring.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 199 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.h | 6 |
3 files changed, 134 insertions, 81 deletions
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index 1d0aa687337..17f6b83a619 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -418,7 +418,9 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { unsigned LoadReg = 0; unsigned StoreReg = 0; - if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS))) + unsigned LoadSize = 0; + unsigned StoreSize = 0; + if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS, LoadSize))) continue; // Skip the ...pseudo debugging... instructions between a load and store. while ((NextMI != E) && NextMI->isDebugValue()) { @@ -426,9 +428,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++I; } if (NextMI == E) continue; - if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS))) + if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize))) + continue; + if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 || + LoadSize != StoreSize) continue; - if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; ++NumDead; changed = true; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index ad92a038107..11ca8b0aa3f 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3939,24 +3939,40 @@ bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op, return false; } -static bool isFrameLoadOpcode(int Opcode) { +static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) { switch (Opcode) { default: return false; case X86::MOV8rm: + case X86::KMOVBkm: + MemBytes = 1; + return true; case X86::MOV16rm: + case X86::KMOVWkm: + MemBytes = 2; + return true; case X86::MOV32rm: + case X86::MOVSSrm: + case X86::VMOVSSZrm: + case X86::KMOVDkm: + MemBytes = 4; + return true; case X86::MOV64rm: case X86::LD_Fp64m: - case X86::MOVSSrm: case X86::MOVSDrm: + case X86::VMOVSSrm: + case X86::VMOVSDZrm: + case X86::MMX_MOVD64rm: + case X86::MMX_MOVQ64rm: + case X86::KMOVQkm: + MemBytes = 8; + return true; case X86::MOVAPSrm: case X86::MOVUPSrm: case X86::MOVAPDrm: case X86::MOVUPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: - case X86::VMOVSSrm: case X86::VMOVSDrm: case X86::VMOVAPSrm: case X86::VMOVUPSrm: @@ -3964,131 +3980,142 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::VMOVUPDrm: case X86::VMOVDQArm: case X86::VMOVDQUrm: - case X86::VMOVUPSYrm: + case X86::VMOVAPSZ128rm: + case X86::VMOVUPSZ128rm: + case X86::VMOVAPSZ128rm_NOVLX: + case X86::VMOVUPSZ128rm_NOVLX: + case X86::VMOVAPDZ128rm: + case X86::VMOVUPDZ128rm: + case X86::VMOVDQU8Z128rm: + case X86::VMOVDQU16Z128rm: + case X86::VMOVDQA32Z128rm: + case X86::VMOVDQU32Z128rm: + case X86::VMOVDQA64Z128rm: + case X86::VMOVDQU64Z128rm: + MemBytes = 16; + return true; case X86::VMOVAPSYrm: - case X86::VMOVUPDYrm: + case X86::VMOVUPSYrm: case X86::VMOVAPDYrm: - case X86::VMOVDQUYrm: + case X86::VMOVUPDYrm: case X86::VMOVDQAYrm: - case X86::MMX_MOVD64rm: - case X86::MMX_MOVQ64rm: - case X86::VMOVSSZrm: - case X86::VMOVSDZrm: - case X86::VMOVAPSZrm: - case X86::VMOVAPSZ128rm: + case X86::VMOVDQUYrm: case X86::VMOVAPSZ256rm: - case X86::VMOVAPSZ128rm_NOVLX: - case X86::VMOVAPSZ256rm_NOVLX: - case X86::VMOVUPSZrm: - case X86::VMOVUPSZ128rm: case X86::VMOVUPSZ256rm: - case X86::VMOVUPSZ128rm_NOVLX: + case X86::VMOVAPSZ256rm_NOVLX: case X86::VMOVUPSZ256rm_NOVLX: - case X86::VMOVAPDZrm: - case X86::VMOVAPDZ128rm: case X86::VMOVAPDZ256rm: - case X86::VMOVUPDZrm: - case X86::VMOVUPDZ128rm: case X86::VMOVUPDZ256rm: - case X86::VMOVDQA32Zrm: - case X86::VMOVDQA32Z128rm: + case X86::VMOVDQU8Z256rm: + case X86::VMOVDQU16Z256rm: case X86::VMOVDQA32Z256rm: - case X86::VMOVDQU32Zrm: - case X86::VMOVDQU32Z128rm: case X86::VMOVDQU32Z256rm: - case X86::VMOVDQA64Zrm: - case X86::VMOVDQA64Z128rm: case X86::VMOVDQA64Z256rm: - case X86::VMOVDQU64Zrm: - case X86::VMOVDQU64Z128rm: case X86::VMOVDQU64Z256rm: + MemBytes = 32; + return true; + case X86::VMOVAPSZrm: + case X86::VMOVUPSZrm: + case X86::VMOVAPDZrm: + case X86::VMOVUPDZrm: case X86::VMOVDQU8Zrm: - case X86::VMOVDQU8Z128rm: - case X86::VMOVDQU8Z256rm: case X86::VMOVDQU16Zrm: - case X86::VMOVDQU16Z128rm: - case X86::VMOVDQU16Z256rm: - case X86::KMOVBkm: - case X86::KMOVWkm: - case X86::KMOVDkm: - case X86::KMOVQkm: + case X86::VMOVDQA32Zrm: + case X86::VMOVDQU32Zrm: + case X86::VMOVDQA64Zrm: + case X86::VMOVDQU64Zrm: + MemBytes = 64; return true; } } -static bool isFrameStoreOpcode(int Opcode) { +static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) { switch (Opcode) { - default: break; + default: + return false; case X86::MOV8mr: + case X86::KMOVBmk: + MemBytes = 1; + return true; case X86::MOV16mr: + case X86::KMOVWmk: + MemBytes = 2; + return true; case X86::MOV32mr: + case X86::MOVSSmr: + case X86::VMOVSSmr: + case X86::VMOVSSZmr: + case X86::KMOVDmk: + MemBytes = 4; + return true; case X86::MOV64mr: case X86::ST_FpP64m: - case X86::MOVSSmr: case X86::MOVSDmr: + case X86::VMOVSDmr: + case X86::VMOVSDZmr: + case X86::MMX_MOVD64mr: + case X86::MMX_MOVQ64mr: + case X86::MMX_MOVNTQmr: + case X86::KMOVQmk: + MemBytes = 8; + return true; case X86::MOVAPSmr: case X86::MOVUPSmr: case X86::MOVAPDmr: case X86::MOVUPDmr: case X86::MOVDQAmr: case X86::MOVDQUmr: - case X86::VMOVSSmr: - case X86::VMOVSDmr: case X86::VMOVAPSmr: case X86::VMOVUPSmr: case X86::VMOVAPDmr: case X86::VMOVUPDmr: case X86::VMOVDQAmr: case X86::VMOVDQUmr: + case X86::VMOVUPSZ128mr: + case X86::VMOVAPSZ128mr: + case X86::VMOVUPSZ128mr_NOVLX: + case X86::VMOVAPSZ128mr_NOVLX: + case X86::VMOVUPDZ128mr: + case X86::VMOVAPDZ128mr: + case X86::VMOVDQA32Z128mr: + case X86::VMOVDQU32Z128mr: + case X86::VMOVDQA64Z128mr: + case X86::VMOVDQU64Z128mr: + case X86::VMOVDQU8Z128mr: + case X86::VMOVDQU16Z128mr: + MemBytes = 16; + return true; case X86::VMOVUPSYmr: case X86::VMOVAPSYmr: case X86::VMOVUPDYmr: case X86::VMOVAPDYmr: case X86::VMOVDQUYmr: case X86::VMOVDQAYmr: - case X86::VMOVSSZmr: - case X86::VMOVSDZmr: - case X86::VMOVUPSZmr: - case X86::VMOVUPSZ128mr: case X86::VMOVUPSZ256mr: - case X86::VMOVUPSZ128mr_NOVLX: - case X86::VMOVUPSZ256mr_NOVLX: - case X86::VMOVAPSZmr: - case X86::VMOVAPSZ128mr: case X86::VMOVAPSZ256mr: - case X86::VMOVAPSZ128mr_NOVLX: + case X86::VMOVUPSZ256mr_NOVLX: case X86::VMOVAPSZ256mr_NOVLX: - case X86::VMOVUPDZmr: - case X86::VMOVUPDZ128mr: case X86::VMOVUPDZ256mr: - case X86::VMOVAPDZmr: - case X86::VMOVAPDZ128mr: case X86::VMOVAPDZ256mr: - case X86::VMOVDQA32Zmr: - case X86::VMOVDQA32Z128mr: + case X86::VMOVDQU8Z256mr: + case X86::VMOVDQU16Z256mr: case X86::VMOVDQA32Z256mr: - case X86::VMOVDQU32Zmr: - case X86::VMOVDQU32Z128mr: case X86::VMOVDQU32Z256mr: - case X86::VMOVDQA64Zmr: - case X86::VMOVDQA64Z128mr: case X86::VMOVDQA64Z256mr: - case X86::VMOVDQU64Zmr: - case X86::VMOVDQU64Z128mr: case X86::VMOVDQU64Z256mr: + MemBytes = 32; + return true; + case X86::VMOVUPSZmr: + case X86::VMOVAPSZmr: + case X86::VMOVUPDZmr: + case X86::VMOVAPDZmr: case X86::VMOVDQU8Zmr: - case X86::VMOVDQU8Z128mr: - case X86::VMOVDQU8Z256mr: case X86::VMOVDQU16Zmr: - case X86::VMOVDQU16Z128mr: - case X86::VMOVDQU16Z256mr: - case X86::MMX_MOVD64mr: - case X86::MMX_MOVQ64mr: - case X86::MMX_MOVNTQmr: - case X86::KMOVBmk: - case X86::KMOVWmk: - case X86::KMOVDmk: - case X86::KMOVQmk: + case X86::VMOVDQA32Zmr: + case X86::VMOVDQU32Zmr: + case X86::VMOVDQA64Zmr: + case X86::VMOVDQU64Zmr: + MemBytes = 64; return true; } return false; @@ -4096,7 +4123,14 @@ static bool isFrameStoreOpcode(int Opcode) { unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { - if (isFrameLoadOpcode(MI.getOpcode())) + unsigned Dummy; + return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy); +} + +unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex, + unsigned &MemBytes) const { + if (isFrameLoadOpcode(MI.getOpcode(), MemBytes)) if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) return MI.getOperand(0).getReg(); return 0; @@ -4104,7 +4138,8 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const { - if (isFrameLoadOpcode(MI.getOpcode())) { + unsigned Dummy; + if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) { unsigned Reg; if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) return Reg; @@ -4117,7 +4152,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { - if (isFrameStoreOpcode(MI.getOpcode())) + unsigned Dummy; + return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy); +} + +unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex, + unsigned &MemBytes) const { + if (isFrameStoreOpcode(MI.getOpcode(), MemBytes)) if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 && isFrameOperand(MI, 0, FrameIndex)) return MI.getOperand(X86::AddrNumOperands).getReg(); @@ -4126,7 +4168,8 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const { - if (isFrameStoreOpcode(MI.getOpcode())) { + unsigned Dummy; + if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) { unsigned Reg; if ((Reg = isStoreToStackSlot(MI, FrameIndex))) return Reg; diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 5b2799d049f..3abc0ad1458 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -238,6 +238,9 @@ public: unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex, + unsigned &MemBytes) const override; /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination /// stack locations as well. This uses a heuristic so it isn't /// reliable for correctness. @@ -246,6 +249,9 @@ public: unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex, + unsigned &MemBytes) const override; /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination /// stack locations as well. This uses a heuristic so it isn't /// reliable for correctness. |