summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/lower-vec-shift.ll
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-10-04 17:20:12 +0000
committerCraig Topper <craig.topper@intel.com>2017-10-04 17:20:12 +0000
commit6fb55716e9c77e716f4ac25abc84687eac29f848 (patch)
tree07097be59b5b39b860012ae8d20d8248fb9a6506 /llvm/test/CodeGen/X86/lower-vec-shift.ll
parent5a09996ff70ee0cd533c8d401f3924aee662551f (diff)
downloadbcm5719-llvm-6fb55716e9c77e716f4ac25abc84687eac29f848.tar.gz
bcm5719-llvm-6fb55716e9c77e716f4ac25abc84687eac29f848.zip
[X86] Redefine MOVSS/MOVSD instructions to take VR128 regclass as input instead of FR32/FR64
This patch redefines the MOVSS/MOVSD instructions to take VR128 as its second input. This allows the MOVSS/SD->BLEND commute to work without requiring a COPY to be inserted. This should fix PR33079 Overall this looks to be an improvement in the generated code. I haven't checked the EXPENSIVE_CHECKS build but I'll do that and update with results. Differential Revision: https://reviews.llvm.org/D38449 llvm-svn: 314914
Diffstat (limited to 'llvm/test/CodeGen/X86/lower-vec-shift.ll')
-rw-r--r--llvm/test/CodeGen/X86/lower-vec-shift.ll56
1 files changed, 24 insertions, 32 deletions
diff --git a/llvm/test/CodeGen/X86/lower-vec-shift.ll b/llvm/test/CodeGen/X86/lower-vec-shift.ll
index 8d64baf5f2a..936de7c761a 100644
--- a/llvm/test/CodeGen/X86/lower-vec-shift.ll
+++ b/llvm/test/CodeGen/X86/lower-vec-shift.ll
@@ -12,10 +12,9 @@ define <8 x i16> @test1(<8 x i16> %a) {
; SSE-LABEL: test1:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrlw $2, %xmm1
-; SSE-NEXT: psrlw $3, %xmm0
-; SSE-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: psrlw $3, %xmm1
+; SSE-NEXT: psrlw $2, %xmm0
+; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE-NEXT: retq
;
; AVX1-LABEL: test1:
@@ -39,10 +38,9 @@ define <8 x i16> @test2(<8 x i16> %a) {
; SSE-LABEL: test2:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrlw $2, %xmm1
-; SSE-NEXT: psrlw $3, %xmm0
-; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: psrlw $3, %xmm1
+; SSE-NEXT: psrlw $2, %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE-NEXT: retq
;
; AVX1-LABEL: test2:
@@ -66,10 +64,9 @@ define <4 x i32> @test3(<4 x i32> %a) {
; SSE-LABEL: test3:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $2, %xmm1
-; SSE-NEXT: psrld $3, %xmm0
-; SSE-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: psrld $3, %xmm1
+; SSE-NEXT: psrld $2, %xmm0
+; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE-NEXT: retq
;
; AVX1-LABEL: test3:
@@ -91,10 +88,9 @@ define <4 x i32> @test4(<4 x i32> %a) {
; SSE-LABEL: test4:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $2, %xmm1
-; SSE-NEXT: psrld $3, %xmm0
-; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: psrld $3, %xmm1
+; SSE-NEXT: psrld $2, %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE-NEXT: retq
;
; AVX1-LABEL: test4:
@@ -116,10 +112,9 @@ define <8 x i16> @test5(<8 x i16> %a) {
; SSE-LABEL: test5:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psraw $2, %xmm1
-; SSE-NEXT: psraw $3, %xmm0
-; SSE-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: psraw $3, %xmm1
+; SSE-NEXT: psraw $2, %xmm0
+; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE-NEXT: retq
;
; AVX1-LABEL: test5:
@@ -143,10 +138,9 @@ define <8 x i16> @test6(<8 x i16> %a) {
; SSE-LABEL: test6:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psraw $2, %xmm1
-; SSE-NEXT: psraw $3, %xmm0
-; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: psraw $3, %xmm1
+; SSE-NEXT: psraw $2, %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE-NEXT: retq
;
; AVX1-LABEL: test6:
@@ -170,10 +164,9 @@ define <4 x i32> @test7(<4 x i32> %a) {
; SSE-LABEL: test7:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrad $2, %xmm1
-; SSE-NEXT: psrad $3, %xmm0
-; SSE-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: psrad $3, %xmm1
+; SSE-NEXT: psrad $2, %xmm0
+; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE-NEXT: retq
;
; AVX1-LABEL: test7:
@@ -195,10 +188,9 @@ define <4 x i32> @test8(<4 x i32> %a) {
; SSE-LABEL: test8:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrad $2, %xmm1
-; SSE-NEXT: psrad $3, %xmm0
-; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: psrad $3, %xmm1
+; SSE-NEXT: psrad $2, %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE-NEXT: retq
;
; AVX1-LABEL: test8:
OpenPOWER on IntegriCloud