summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2011-11-10 00:17:03 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2011-11-10 00:17:03 +0000
commiteef48b6938662792d7612fc8e1f6a505e8998b8d (patch)
tree1dfa6da893953b777e0f54072a94c9f5c1e1576c /llvm
parentf16beb39429b3adcbd0c1baf018bc021ad844957 (diff)
downloadbcm5719-llvm-eef48b6938662792d7612fc8e1f6a505e8998b8d.tar.gz
bcm5719-llvm-eef48b6938662792d7612fc8e1f6a505e8998b8d.zip
Strip old implicit operands after foldMemoryOperand.
The TII.foldMemoryOperand hook preserves implicit operands from the original instruction. This is not what we want when those implicit operands refer to the register being spilled. Implicit operands referring to other registers are preserved. This fixes PR11347. llvm-svn: 144247
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp21
-rw-r--r--llvm/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll105
2 files changed, 124 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 726af469657..d1e3f1afbfa 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -1017,14 +1017,18 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) {
bool WasCopy = MI->isCopy();
+ unsigned ImpReg = 0;
+
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
unsigned Idx = Ops[i];
MachineOperand &MO = MI->getOperand(Idx);
- if (MO.isImplicit())
+ if (MO.isImplicit()) {
+ ImpReg = MO.getReg();
continue;
+ }
// FIXME: Teach targets to deal with subregs.
if (MO.getSubReg())
return false;
@@ -1045,7 +1049,20 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
if (!LoadMI)
VRM.addSpillSlotUse(StackSlot, FoldMI);
MI->eraseFromParent();
- DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+
+ // TII.foldMemoryOperand may have left some implicit operands on the
+ // instruction. Strip them.
+ if (ImpReg)
+ for (unsigned i = FoldMI->getNumOperands(); i; --i) {
+ MachineOperand &MO = FoldMI->getOperand(i - 1);
+ if (!MO.isReg() || !MO.isImplicit())
+ break;
+ if (MO.getReg() == ImpReg)
+ FoldMI->RemoveOperand(i - 1);
+ }
+
+ DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t'
+ << *FoldMI);
if (!WasCopy)
++NumFolded;
else if (Ops.front() == 0)
diff --git a/llvm/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll b/llvm/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll
new file mode 100644
index 00000000000..095d8c68c22
--- /dev/null
+++ b/llvm/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -verify-regalloc | FileCheck %s
+; PR11347
+;
+; This test case materializes the constant 1 in a register:
+;
+; %vreg19<def> = MOV32ri 1
+;
+; Then rematerializes the instruction for a sub-register copy:
+; 1168L %vreg14:sub_8bit<def,undef> = COPY %vreg19:sub_8bit<kill>, %vreg14<imp-def>; GR32:%vreg14,%vreg19
+; Considering merging %vreg19 with %vreg14
+; RHS = %vreg19 = [560d,656L:0)[720L,976d:0)[1088L,1168d:0) 0@560d
+; LHS = %vreg14 = [16d,160L:0)[160L,256L:2)[256L,1088L:1)[1168d,1184L:3)[1184L,1344L:2) 0@16d-phikill 1@256L-phidef-phikill 2@1184L-phidef-phikill 3@1168d-phikill
+; Remat: %vreg14<def> = MOV32ri 1, %vreg14<imp-def>, %vreg14<imp-def>; GR32:%vreg14
+;
+; This rematerialized constant is feeding a PHI that is spilled, so the constant
+; is written directly to a stack slot that gets the %esi function argument in
+; another basic block:
+;
+; CHECK: %entry
+; CHECK: movl %esi, [[FI:[0-9]+\(%rsp\)]]
+; CHECK: %if.else24
+; CHECK: movl $1, [[FI]]
+; CHECK: %lor.end9
+; CHECK: movl [[FI]],
+;
+; Those <imp-def> operands on the MOV32ri instruction confused the spiller
+; because they were preserved by TII.foldMemoryOperand. It is quite rare to
+; see a rematerialized instruction spill, it can only happen when it is feeding
+; a PHI.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+@g_193 = external global i32, align 4
+@g_103 = external global i32, align 4
+
+declare i32 @func_21(i16 signext, i32) nounwind uwtable readnone ssp
+
+define i32 @func_25(i32 %p_27, i8 signext %p_28, i32 %p_30) noreturn nounwind uwtable ssp {
+entry:
+ br label %for.cond
+
+for.cond28.for.cond.loopexit_crit_edge: ; preds = %for.cond28thread-pre-split
+ store i32 0, i32* @g_103, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.cond28thread-pre-split, %for.cond28.for.cond.loopexit_crit_edge, %entry
+ %l_365.0 = phi i32 [ undef, %entry ], [ %and, %for.cond28.for.cond.loopexit_crit_edge ], [ %and, %for.cond28thread-pre-split ]
+ %l_288.0 = phi i32 [ undef, %entry ], [ %l_288.1.ph, %for.cond28.for.cond.loopexit_crit_edge ], [ %l_288.1.ph, %for.cond28thread-pre-split ]
+ %l_349.0 = phi i32 [ undef, %entry ], [ %xor, %for.cond28.for.cond.loopexit_crit_edge ], [ %xor, %for.cond28thread-pre-split ]
+ %p_28.addr.0 = phi i8 [ %p_28, %entry ], [ %p_28.addr.1.ph, %for.cond28.for.cond.loopexit_crit_edge ], [ %p_28.addr.1.ph, %for.cond28thread-pre-split ]
+ br i1 undef, label %for.cond31, label %lor.end
+
+lor.end: ; preds = %for.cond
+ %tobool3 = icmp eq i32 %l_349.0, 0
+ br i1 %tobool3, label %for.cond31, label %if.then
+
+if.then: ; preds = %lor.end
+ br i1 undef, label %lor.rhs6, label %lor.end9
+
+lor.rhs6: ; preds = %if.then
+ br label %lor.end9
+
+lor.end9: ; preds = %lor.rhs6, %if.then
+ %and = and i32 %l_365.0, 1
+ %conv11 = sext i8 %p_28.addr.0 to i32
+ %xor = xor i32 %and, %conv11
+ br i1 false, label %if.else, label %if.end
+
+if.else: ; preds = %lor.end9
+ br label %if.end
+
+if.end: ; preds = %if.else, %lor.end9
+ %l_395.0 = phi i32 [ 0, %if.else ], [ 1, %lor.end9 ]
+ %cmp14 = icmp ne i32 %and, %conv11
+ %conv15 = zext i1 %cmp14 to i32
+ br i1 %cmp14, label %if.then16, label %for.cond28thread-pre-split
+
+if.then16: ; preds = %if.end
+ %or17 = or i32 %l_288.0, 1
+ %call18 = tail call i32 @func_39(i32 0, i32 %or17, i32 0, i32 0) nounwind
+ br i1 undef, label %if.else24, label %if.then20
+
+if.then20: ; preds = %if.then16
+ %conv21 = trunc i32 %l_395.0 to i16
+ %call22 = tail call i32 @func_21(i16 signext %conv21, i32 undef)
+ br label %for.cond28thread-pre-split
+
+if.else24: ; preds = %if.then16
+ store i32 %conv15, i32* @g_193, align 4
+ %conv25 = trunc i32 %l_395.0 to i8
+ br label %for.cond28thread-pre-split
+
+for.cond28thread-pre-split: ; preds = %if.else24, %if.then20, %if.end
+ %l_288.1.ph = phi i32 [ %l_288.0, %if.end ], [ %or17, %if.else24 ], [ %or17, %if.then20 ]
+ %p_28.addr.1.ph = phi i8 [ %p_28.addr.0, %if.end ], [ %conv25, %if.else24 ], [ %p_28.addr.0, %if.then20 ]
+ %.pr = load i32* @g_103, align 4
+ %tobool2933 = icmp eq i32 %.pr, 0
+ br i1 %tobool2933, label %for.cond, label %for.cond28.for.cond.loopexit_crit_edge
+
+for.cond31: ; preds = %for.cond31, %lor.end, %for.cond
+ br label %for.cond31
+}
+
+declare i32 @func_39(i32, i32, i32, i32)
OpenPOWER on IntegriCloud