fix invalid load folding with SSE/AVX FP logical instructions (PR22371)

This is a follow-up to the FIXME that was added with D7474 ( http://reviews.llvm.org/rL229531 ). I thought this load folding bug had been made hard-to-hit, but it turns out to be very easy when targeting 32-bit x86 and causes a miscompile/crash in Wine: https://bugs.winehq.org/show_bug.cgi?id=38826 https://llvm.org/bugs/show_bug.cgi?id=22371#c25 The quick fix is to simply remove the scalar FP logical instructions from the load folding table in X86InstrInfo, but that causes us to miss load folds that should be possible when lowering fabs, fneg, fcopysign. So the majority of this patch is altering those lowerings to use *vector* FP logical instructions (because that's all x86 gives us anyway). That lets us do the load folding legally. Differential Revision: http://reviews.llvm.org/D11477 llvm-svn: 243361
author: Sanjay Patel <spatel@rotateright.com> 2015-07-28 00:48:32 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2015-07-28 00:48:32 +0000
commit: 8c13e3680d3f4fc88830c402a2bcf929db0de9dd (patch)
tree: 26687837bdd765f0eb81a1c96076c7f2e3dbe61a /llvm/test/CodeGen
parent: 203f09223bb0bed16194d6a28431ba6a2fd5346a (diff)
download: bcm5719-llvm-8c13e3680d3f4fc88830c402a2bcf929db0de9dd.tar.gz
bcm5719-llvm-8c13e3680d3f4fc88830c402a2bcf929db0de9dd.zip
3 files changed, 47 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/X86/pr2656.ll b/llvm/test/CodeGen/X86/pr2656.ll
index 9a162d77ef4..095ab831d48 100644
--- a/llvm/test/CodeGen/X86/pr2656.ll
+++ b/llvm/test/CodeGen/X86/pr2656.ll
@@ -1,15 +1,24 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 ; PR2656
 
-; CHECK:     {{xorps.*sp}}
-; CHECK-NOT: {{xorps.*sp}}
-
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.4.0"
 	%struct.anon = type <{ float, float }>
 @.str = internal constant [17 x i8] c"pt: %.0f, %.0f\0A\00\00"		; <[17 x i8]*> [#uses=1]
 
+; We can not fold either stack load into an 'xor' instruction because that
+; would change what should be a 4-byte load into a 16-byte load.
+; We can fold the 16-byte constant load into either 'xor' instruction,
+; but we do not. It has more than one use, so it gets loaded into a register.
+
 define void @foo(%struct.anon* byval %p) nounwind {
+; CHECK-LABEL: foo:
+; CHECK:         movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movaps {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT:    xorps %xmm2, %xmm0
+; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
+; CHECK-NEXT:    xorps %xmm2, %xmm1
 entry:
 	%tmp = getelementptr %struct.anon, %struct.anon* %p, i32 0, i32 0		; <float*> [#uses=1]
 	%tmp1 = load float, float* %tmp		; <float> [#uses=1]
@@ -24,3 +33,20 @@ entry:
 }
 
 declare i32 @printf(...)
+
+; We can not fold the load from the stack into the 'and' instruction because
+; that changes an 8-byte load into a 16-byte load (illegal memory access).
+; We can fold the load of the constant because it is a 16-byte vector constant.
+
+define double @PR22371(double %x) {
+; CHECK-LABEL: PR22371:
+; CHECK:       movsd  16(%esp), %xmm0
+; CHECK-NEXT:  andpd  LCPI1_0, %xmm0
+; CHECK-NEXT:  movlpd  %xmm0, (%esp)
+  %call = tail call double @fabs(double %x) #0
+  ret double %call
+}
+
+declare double @fabs(double) #0
+attributes #0 = { readnone }
+
diff --git a/llvm/test/CodeGen/X86/sse-fcopysign.ll b/llvm/test/CodeGen/X86/sse-fcopysign.ll
index 25634b5472a..8a5462bea82 100644
--- a/llvm/test/CodeGen/X86/sse-fcopysign.ll
+++ b/llvm/test/CodeGen/X86/sse-fcopysign.ll
@@ -55,12 +55,12 @@ declare double @copysign(double, double)
 
 define float @int1(float %a, float %b) {
 ; X32-LABEL: @int1
-; X32:       movss 12(%esp), %xmm0 {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:  movss  8(%esp), %xmm1 {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT:  andps .LCPI2_0, %xmm1
-; X32-NEXT:  andps .LCPI2_1, %xmm0
-; X32-NEXT:  orps  %xmm1, %xmm0
-; X32-NEXT:  movss %xmm0, (%esp)
+; X32:       movss  8(%esp), %xmm0 {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:  andps .LCPI2_0, %xmm0
+; X32-NEXT:  movss 12(%esp), %xmm1 {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT:  andps .LCPI2_1, %xmm1
+; X32-NEXT:  orps  %xmm0, %xmm1
+; X32-NEXT:  movss %xmm1, (%esp)
 ; X32-NEXT:  flds  (%esp)
 ; X32-NEXT:  popl %eax
 ; X32-NEXT:  retl
@@ -76,14 +76,14 @@ define float @int1(float %a, float %b) {
 
 define double @int2(double %a, float %b, float %c) {
 ; X32-LABEL: @int2
-; X32:       movsd  8(%ebp), %xmm0 {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:  movss 16(%ebp), %xmm1 {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT:  addss 20(%ebp), %xmm1
-; X32-NEXT:  andpd .LCPI3_0, %xmm0
-; X32-NEXT:  cvtss2sd %xmm1, %xmm1
-; X32-NEXT:  andpd .LCPI3_1, %xmm1
-; X32-NEXT:  orpd  %xmm0, %xmm1
-; X32-NEXT:  movsd %xmm1, (%esp)
+; X32:       movss 16(%ebp), %xmm0 {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:  addss 20(%ebp), %xmm0
+; X32-NEXT:  movsd  8(%ebp), %xmm1 {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT:  andpd .LCPI3_0, %xmm1
+; X32-NEXT:  cvtss2sd %xmm0, %xmm0
+; X32-NEXT:  andpd .LCPI3_1, %xmm0
+; X32-NEXT:  orpd  %xmm1, %xmm0
+; X32-NEXT:  movlpd %xmm0, (%esp)
 ; X32-NEXT:  fldl  (%esp)
 ; X32-NEXT:  movl %ebp, %esp
 ; X32-NEXT:  popl %ebp
@@ -91,9 +91,9 @@ define double @int2(double %a, float %b, float %c) {
 ;
 ; X64-LABEL: @int2
 ; X64:       addss %xmm2, %xmm1
-; X64-NEXT:  andpd .LCPI3_0(%rip), %xmm0
 ; X64-NEXT:  cvtss2sd %xmm1, %xmm1
-; X64-NEXT:  andpd .LCPI3_1(%rip), %xmm1
+; X64-NEXT:  andpd .LCPI3_0(%rip), %xmm1
+; X64-NEXT:  andpd .LCPI3_1(%rip), %xmm0
 ; X64-NEXT:  orpd %xmm1, %xmm0
 ; X64-NEXT:  retq
   %tmp1 = fadd float %b, %c
diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll
index bfefbcf5ebd..960b5f27cf5 100644
--- a/llvm/test/CodeGen/X86/vec_fabs.ll
+++ b/llvm/test/CodeGen/X86/vec_fabs.ll
@@ -4,7 +4,7 @@
 define <2 x double> @fabs_v2f64(<2 x double> %p)
 {
   ; CHECK-LABEL: fabs_v2f64
-  ; CHECK: vandps
+  ; CHECK: vandpd
   %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
@@ -22,7 +22,7 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
 define <4 x double> @fabs_v4f64(<4 x double> %p)
 {
   ; CHECK-LABEL: fabs_v4f64
-  ; CHECK: vandps
+  ; CHECK: vandpd
   %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
author	Sanjay Patel <spatel@rotateright.com>	2015-07-28 00:48:32 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2015-07-28 00:48:32 +0000
commit	8c13e3680d3f4fc88830c402a2bcf929db0de9dd (patch)
tree	26687837bdd765f0eb81a1c96076c7f2e3dbe61a /llvm/test/CodeGen
parent	203f09223bb0bed16194d6a28431ba6a2fd5346a (diff)
download	bcm5719-llvm-8c13e3680d3f4fc88830c402a2bcf929db0de9dd.tar.gz bcm5719-llvm-8c13e3680d3f4fc88830c402a2bcf929db0de9dd.zip