summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2013-10-06 13:48:22 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2013-10-06 13:48:22 +0000
commit858a3880d6ac21e2f5a16f1a0df408dab5333a8b (patch)
tree24fd1f84e1f80f932bf9f8f31fac9f4249239dbe
parent2e408aefe00b725008bd1f2c185f3c76a9d15c5d (diff)
downloadbcm5719-llvm-858a3880d6ac21e2f5a16f1a0df408dab5333a8b.tar.gz
bcm5719-llvm-858a3880d6ac21e2f5a16f1a0df408dab5333a8b.zip
X86: Don't fold spills into SSE operations if the stack is unaligned.
Regalloc can emit unaligned spills nowadays, but we can't fold the spills into SSE ops if we can't guarantee alignment. PR12250. llvm-svn: 192064
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp4
-rw-r--r--llvm/test/CodeGen/X86/unaligned-spill-folding.ll49
2 files changed, 53 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 277e043654e..6f8b0989a5f 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4166,6 +4166,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+ // If the function stack isn't realigned we don't want to fold instructions
+ // that need increased alignment.
+ if (!RI.needsStackRealignment(MF))
+ Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
diff --git a/llvm/test/CodeGen/X86/unaligned-spill-folding.ll b/llvm/test/CodeGen/X86/unaligned-spill-folding.ll
new file mode 100644
index 00000000000..af0c59b6739
--- /dev/null
+++ b/llvm/test/CodeGen/X86/unaligned-spill-folding.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple=i386-unknown-freebsd -mattr=sse2 -stack-alignment=4 -relocation-model=pic < %s | FileCheck %s -check-prefix=UNALIGNED
+; RUN: llc -mtriple=i386-unknown-freebsd -mattr=sse2 -stack-alignment=16 -relocation-model=pic < %s | FileCheck %s -check-prefix=ALIGNED
+; RUN: llc -mtriple=i386-unknown-freebsd -mattr=sse2 -stack-alignment=4 -force-align-stack -relocation-model=pic < %s | FileCheck %s -check-prefix=FORCEALIGNED
+
+@arr = internal unnamed_addr global [32 x i32] zeroinitializer, align 16
+
+; PR12250
+define i32 @test1() {
+vector.ph:
+ br label %vector.body
+
+vector.body:
+ %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+ %0 = getelementptr inbounds [32 x i32]* @arr, i32 0, i32 %index
+ %1 = bitcast i32* %0 to <4 x i32>*
+ %wide.load = load <4 x i32>* %1, align 16
+ %2 = add nsw <4 x i32> %wide.load, <i32 10, i32 10, i32 10, i32 10>
+ %3 = xor <4 x i32> %2, <i32 123345, i32 123345, i32 123345, i32 123345>
+ %4 = add nsw <4 x i32> %3, <i32 112, i32 112, i32 112, i32 112>
+ %5 = xor <4 x i32> %4, <i32 543345, i32 543345, i32 543345, i32 543345>
+ %6 = add nsw <4 x i32> %5, <i32 73, i32 73, i32 73, i32 73>
+ %7 = xor <4 x i32> %6, <i32 345987, i32 345987, i32 345987, i32 345987>
+ %8 = add nsw <4 x i32> %7, <i32 48, i32 48, i32 48, i32 48>
+ %9 = xor <4 x i32> %8, <i32 123987, i32 123987, i32 123987, i32 123987>
+ store <4 x i32> %9, <4 x i32>* %1, align 16
+ %index.next = add i32 %index, 4
+ %10 = icmp eq i32 %index.next, 32
+ br i1 %10, label %middle.block, label %vector.body
+
+middle.block:
+ ret i32 0
+
+; We can't fold the spill into a padd unless the stack is aligned. Just spilling
+; doesn't force stack realignment though
+; UNALIGNED-LABEL: @test1
+; UNALIGNED-NOT: andl $-{{..}}, %esp
+; UNALIGNED: movdqu {{.*}} # 16-byte Folded Spill
+; UNALIGNED-NOT: paddd {{.*}} # 16-byte Folded Reload
+
+; ALIGNED-LABEL: @test1
+; ALIGNED-NOT: andl $-{{..}}, %esp
+; ALIGNED: movdqa {{.*}} # 16-byte Spill
+; ALIGNED: paddd {{.*}} # 16-byte Folded Reload
+
+; FORCEALIGNED-LABEL: @test1
+; FORCEALIGNED: andl $-{{..}}, %esp
+; FORCEALIGNED: movdqa {{.*}} # 16-byte Spill
+; FORCEALIGNED: paddd {{.*}} # 16-byte Folded Reload
+}
OpenPOWER on IntegriCloud