summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorAhmed Bougacha <ahmed.bougacha@gmail.com>2015-06-22 20:51:51 +0000
committerAhmed Bougacha <ahmed.bougacha@gmail.com>2015-06-22 20:51:51 +0000
commited3c4d1a3de98b9ffe2dab8553bdfc87143c95d5 (patch)
tree7fad42c96e07c0ab21d6e45072890e5a35edb209 /llvm/lib/Target/X86
parent8c6fe230d551f52efa47f0a7c64513104ab16db1 (diff)
downloadbcm5719-llvm-ed3c4d1a3de98b9ffe2dab8553bdfc87143c95d5.tar.gz
bcm5719-llvm-ed3c4d1a3de98b9ffe2dab8553bdfc87143c95d5.zip
[X86] Teach load folding to accept scalar _Int users of MOVSS/MOVSD.
The _Int instructions are special, in that they operate on the full VR128 instead of FR32. The load folding then looks at MOVSS, at the user, and bails out when it sees a size mismatch. What we really know is that the rm_Int instructions don't load the higher lanes, so folding is fine. This happens for the straightforward intrinsic code, e.g.: _mm_add_ss(a, _mm_load_ss(p)); Fixes PR23349. Differential Revision: http://reviews.llvm.org/D10554 llvm-svn: 240326
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp56
1 files changed, 46 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 4aa0ae6f195..744a7cbb5b4 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5295,21 +5295,57 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Size, Alignment, /*AllowCommute=*/true);
}
-static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
- const MachineFunction &MF) {
+/// Check if \p LoadMI is a partial register load that we can't fold into \p MI
+/// because the latter uses contents that wouldn't be defined in the folded
+/// version. For instance, this transformation isn't legal:
+/// movss (%rdi), %xmm0
+/// addps %xmm0, %xmm0
+/// ->
+/// addps (%rdi), %xmm0
+///
+/// But this one is:
+/// movss (%rdi), %xmm0
+/// addss %xmm0, %xmm0
+/// ->
+/// addss (%rdi), %xmm0
+///
+static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
+ const MachineInstr &UserMI,
+ const MachineFunction &MF) {
unsigned Opc = LoadMI.getOpcode();
+ unsigned UserOpc = UserMI.getOpcode();
unsigned RegSize =
MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
- if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4)
+ if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4) {
// These instructions only load 32 bits, we can't fold them if the
- // destination register is wider than 32 bits (4 bytes).
- return true;
+ // destination register is wider than 32 bits (4 bytes), and its user
+ // instruction isn't scalar (SS).
+ switch (UserOpc) {
+ case X86::ADDSSrr_Int: case X86::VADDSSrr_Int:
+ case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
+ case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
+ case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
+ return false;
+ default:
+ return true;
+ }
+ }
- if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8)
+ if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8) {
// These instructions only load 64 bits, we can't fold them if the
- // destination register is wider than 64 bits (8 bytes).
- return true;
+ // destination register is wider than 64 bits (8 bytes), and its user
+ // instruction isn't scalar (SD).
+ switch (UserOpc) {
+ case X86::ADDSDrr_Int: case X86::VADDSDrr_Int:
+ case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
+ case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
+ case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
+ return false;
+ default:
+ return true;
+ }
+ }
return false;
}
@@ -5321,7 +5357,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
unsigned NumOps = LoadMI->getDesc().getNumOperands();
int FrameIndex;
if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
- if (isPartialRegisterLoad(*LoadMI, MF))
+ if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
return nullptr;
return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex);
}
@@ -5434,7 +5470,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
break;
}
default: {
- if (isPartialRegisterLoad(*LoadMI, MF))
+ if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
return nullptr;
// Folding a normal load. Just copy the load's address operands.
OpenPOWER on IntegriCloud