summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorMichael Kuperstein <mkuper@google.com>2016-11-23 18:33:49 +0000
committerMichael Kuperstein <mkuper@google.com>2016-11-23 18:33:49 +0000
commit47eb85a0033fd21764fe100e736c3ec54d4f741f (patch)
tree87b3a0867fc7cfd6773183ffa1082de972f1bbc8 /llvm/test
parent3c3fe5d885b8772634a7571907400580873ab611 (diff)
downloadbcm5719-llvm-47eb85a0033fd21764fe100e736c3ec54d4f741f.tar.gz
bcm5719-llvm-47eb85a0033fd21764fe100e736c3ec54d4f741f.zip
[X86] Allow folding of stack reloads when loading a subreg of the spilled reg
We did not support subregs in InlineSpiller:foldMemoryOperand() because targets may not deal with them correctly. This adds a target hook to let the spiller know that a target can handle subregs, and actually enables it for x86 for the case of stack slot reloads. This fixes PR30832. Differential Revision: https://reviews.llvm.org/D26521 llvm-svn: 287792
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/partial-fold32.ll3
-rw-r--r--llvm/test/CodeGen/X86/partial-fold64.ll6
-rw-r--r--llvm/test/CodeGen/X86/vector-half-conversions.ll20
3 files changed, 11 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/X86/partial-fold32.ll b/llvm/test/CodeGen/X86/partial-fold32.ll
index ba3f73ba90f..7fc1ed3521e 100644
--- a/llvm/test/CodeGen/X86/partial-fold32.ll
+++ b/llvm/test/CodeGen/X86/partial-fold32.ll
@@ -3,8 +3,7 @@
define fastcc i8 @fold32to8(i32 %add, i8 %spill) {
; CHECK-LABEL: fold32to8:
; CHECK: movl %ecx, (%esp) # 4-byte Spill
-; CHECK: movl (%esp), %eax # 4-byte Reload
-; CHECK: subb %al, %dl
+; CHECK: subb (%esp), %dl # 1-byte Folded Reload
entry:
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
%trunc = trunc i32 %add to i8
diff --git a/llvm/test/CodeGen/X86/partial-fold64.ll b/llvm/test/CodeGen/X86/partial-fold64.ll
index b9ea7d6773a..15c9d194be4 100644
--- a/llvm/test/CodeGen/X86/partial-fold64.ll
+++ b/llvm/test/CodeGen/X86/partial-fold64.ll
@@ -3,8 +3,7 @@
define i32 @fold64to32(i64 %add, i32 %spill) {
; CHECK-LABEL: fold64to32:
; CHECK: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; CHECK: subl %eax, %esi
+; CHECK: subl -{{[0-9]+}}(%rsp), %esi # 4-byte Folded Reload
entry:
tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"()
%trunc = trunc i64 %add to i32
@@ -15,8 +14,7 @@ entry:
define i8 @fold64to8(i64 %add, i8 %spill) {
; CHECK-LABEL: fold64to8:
; CHECK: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; CHECK: subb %al, %sil
+; CHECK: subb -{{[0-9]+}}(%rsp), %sil # 1-byte Folded Reload
entry:
tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"()
%trunc = trunc i64 %add to i8
diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll
index 0453dc18d01..78522948f0a 100644
--- a/llvm/test/CodeGen/X86/vector-half-conversions.ll
+++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll
@@ -4788,9 +4788,8 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind {
; AVX1-NEXT: orl %ebx, %r14d
; AVX1-NEXT: shlq $32, %r14
; AVX1-NEXT: orq %r15, %r14
-; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = mem[1,0]
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movw %ax, %bx
; AVX1-NEXT: shll $16, %ebx
@@ -4856,9 +4855,8 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind {
; AVX2-NEXT: orl %ebx, %r14d
; AVX2-NEXT: shlq $32, %r14
; AVX2-NEXT: orq %r15, %r14
-; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX2-NEXT: # xmm0 = mem[1,0]
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movw %ax, %bx
; AVX2-NEXT: shll $16, %ebx
@@ -5585,9 +5583,8 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind {
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = mem[1,0]
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movl %eax, %r12d
; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
@@ -5654,9 +5651,8 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
+; AVX2-NEXT: # xmm0 = mem[1,0]
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movl %eax, %r12d
; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
OpenPOWER on IntegriCloud