[x86] enable storeOfVectorConstantIsCheap() target hook

This allows vector-sized store merging of constants in DAGCombiner using the existing code in MergeConsecutiveStores(). All of the twisted logic that decides exactly what vector operations are legal and fast for each particular CPU are handled separately in there using the appropriate hooks. For the motivating tests in merge-store-constants.ll, we already produce the same vector code in IR via the SLP vectorizer. So this is just providing a backend backstop for code that doesn't go through that pass (-O1). More details in PR24449: https://bugs.llvm.org/show_bug.cgi?id=24449 (this change should be the last step to resolve that bug) Differential Revision: https://reviews.llvm.org/D37451 llvm-svn: 313458
author: Sanjay Patel <spatel@rotateright.com> 2017-09-16 13:29:12 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-09-16 13:29:12 +0000
commit: 65d67807039df6a6b5e879fcf4686493680a4812 (patch)
tree: 5007a001db4d1686f4952ad4cf32a6e8cc56e574 /llvm/test/CodeGen/X86/merge-store-constants.ll
parent: 053417946fad2941dc5c8cd7af0b5002e0066cae (diff)
download: bcm5719-llvm-65d67807039df6a6b5e879fcf4686493680a4812.tar.gz
bcm5719-llvm-65d67807039df6a6b5e879fcf4686493680a4812.zip
1 files changed, 44 insertions, 37 deletions
diff --git a/llvm/test/CodeGen/X86/merge-store-constants.ll b/llvm/test/CodeGen/X86/merge-store-constants.ll
index a06f43f7a11..f5c36ca4c2f 100644
--- a/llvm/test/CodeGen/X86/merge-store-constants.ll
+++ b/llvm/test/CodeGen/X86/merge-store-constants.ll
@@ -6,18 +6,14 @@ define void @big_nonzero_16_bytes(i32* nocapture %a) {
 ; X32-LABEL: big_nonzero_16_bytes:
 ; X32:       # BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $1, (%eax)
-; X32-NEXT:    movl $2, 4(%eax)
-; X32-NEXT:    movl $3, 8(%eax)
-; X32-NEXT:    movl $4, 12(%eax)
+; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,3,4]
+; X32-NEXT:    vmovups %xmm0, (%eax)
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: big_nonzero_16_bytes:
 ; X64:       # BB#0:
-; X64-NEXT:    movabsq $8589934593, %rax # imm = 0x200000001
-; X64-NEXT:    movq %rax, (%rdi)
-; X64-NEXT:    movabsq $17179869187, %rax # imm = 0x400000003
-; X64-NEXT:    movq %rax, 8(%rdi)
+; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,3,4]
+; X64-NEXT:    vmovups %xmm0, (%rdi)
 ; X64-NEXT:    retq
   %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2
@@ -30,29 +26,48 @@ define void @big_nonzero_16_bytes(i32* nocapture %a) {
   ret void
 }
 
+; TODO: We assumed that two 64-bit stores were better than 1 vector load and 1 vector store.
+; But if the 64-bit constants can't be represented as sign-extended 32-bit constants, then
+; it takes extra instructions to do this in scalar.
+
+define void @big_nonzero_16_bytes_big64bit_constants(i64* nocapture %a) {
+; X32-LABEL: big_nonzero_16_bytes_big64bit_constants:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,1,3]
+; X32-NEXT:    vmovups %xmm0, (%eax)
+; X32-NEXT:    retl
+;
+; X64-LABEL: big_nonzero_16_bytes_big64bit_constants:
+; X64:       # BB#0:
+; X64-NEXT:    movabsq $4294967297, %rax # imm = 0x100000001
+; X64-NEXT:    movq %rax, (%rdi)
+; X64-NEXT:    movabsq $12884901889, %rax # imm = 0x300000001
+; X64-NEXT:    movq %rax, 8(%rdi)
+; X64-NEXT:    retq
+  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 1
+
+  store i64 4294967297, i64* %a
+  store i64 12884901889, i64* %arrayidx1
+  ret void
+}
+
 ; Splats may be an opportunity to use a broadcast op.
 
 define void @big_nonzero_32_bytes_splat(i32* nocapture %a) {
 ; X32-LABEL: big_nonzero_32_bytes_splat:
 ; X32:       # BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $42, (%eax)
-; X32-NEXT:    movl $42, 4(%eax)
-; X32-NEXT:    movl $42, 8(%eax)
-; X32-NEXT:    movl $42, 12(%eax)
-; X32-NEXT:    movl $42, 16(%eax)
-; X32-NEXT:    movl $42, 20(%eax)
-; X32-NEXT:    movl $42, 24(%eax)
-; X32-NEXT:    movl $42, 28(%eax)
+; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42]
+; X32-NEXT:    vmovups %ymm0, (%eax)
+; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: big_nonzero_32_bytes_splat:
 ; X64:       # BB#0:
-; X64-NEXT:    movabsq $180388626474, %rax # imm = 0x2A0000002A
-; X64-NEXT:    movq %rax, (%rdi)
-; X64-NEXT:    movq %rax, 8(%rdi)
-; X64-NEXT:    movq %rax, 16(%rdi)
-; X64-NEXT:    movq %rax, 24(%rdi)
+; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42]
+; X64-NEXT:    vmovups %ymm0, (%rdi)
+; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
   %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2
@@ -79,37 +94,29 @@ define void @big_nonzero_63_bytes(i8* nocapture %a) {
 ; X32-LABEL: big_nonzero_63_bytes:
 ; X32:       # BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $0, 4(%eax)
-; X32-NEXT:    movl $1, (%eax)
-; X32-NEXT:    movl $0, 12(%eax)
-; X32-NEXT:    movl $2, 8(%eax)
-; X32-NEXT:    movl $0, 20(%eax)
-; X32-NEXT:    movl $3, 16(%eax)
-; X32-NEXT:    movl $0, 28(%eax)
-; X32-NEXT:    movl $4, 24(%eax)
-; X32-NEXT:    movl $0, 36(%eax)
-; X32-NEXT:    movl $5, 32(%eax)
-; X32-NEXT:    movl $0, 44(%eax)
-; X32-NEXT:    movl $6, 40(%eax)
+; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [1,0,2,0,3,0,4,0]
+; X32-NEXT:    vmovups %ymm0, (%eax)
+; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [5,0,6,0]
+; X32-NEXT:    vmovups %xmm0, 32(%eax)
 ; X32-NEXT:    movl $0, 52(%eax)
 ; X32-NEXT:    movl $7, 48(%eax)
 ; X32-NEXT:    movl $8, 56(%eax)
 ; X32-NEXT:    movw $9, 60(%eax)
 ; X32-NEXT:    movb $10, 62(%eax)
+; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: big_nonzero_63_bytes:
 ; X64:       # BB#0:
-; X64-NEXT:    movq $1, (%rdi)
-; X64-NEXT:    movq $2, 8(%rdi)
-; X64-NEXT:    movq $3, 16(%rdi)
-; X64-NEXT:    movq $4, 24(%rdi)
+; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,3,4]
+; X64-NEXT:    vmovups %ymm0, (%rdi)
 ; X64-NEXT:    movq $5, 32(%rdi)
 ; X64-NEXT:    movq $6, 40(%rdi)
 ; X64-NEXT:    movq $7, 48(%rdi)
 ; X64-NEXT:    movl $8, 56(%rdi)
 ; X64-NEXT:    movw $9, 60(%rdi)
 ; X64-NEXT:    movb $10, 62(%rdi)
+; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
   %a8 = bitcast i8* %a to i64*
   %arrayidx8 = getelementptr inbounds i64, i64* %a8, i64 1
author	Sanjay Patel <spatel@rotateright.com>	2017-09-16 13:29:12 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-09-16 13:29:12 +0000
commit	65d67807039df6a6b5e879fcf4686493680a4812 (patch)
tree	5007a001db4d1686f4952ad4cf32a6e8cc56e574 /llvm/test/CodeGen/X86/merge-store-constants.ll
parent	053417946fad2941dc5c8cd7af0b5002e0066cae (diff)
download	bcm5719-llvm-65d67807039df6a6b5e879fcf4686493680a4812.tar.gz bcm5719-llvm-65d67807039df6a6b5e879fcf4686493680a4812.zip