summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorAhmed Bougacha <ahmed.bougacha@gmail.com>2015-11-06 23:16:48 +0000
committerAhmed Bougacha <ahmed.bougacha@gmail.com>2015-11-06 23:16:48 +0000
commitb49eb3ab4b7563d97a871dcd7dae4bd383c6c42f (patch)
tree49767fd310390154e5bf0716c76c4b38f6fcf9c5 /llvm/test/CodeGen/X86
parent05a0514b12d07be4e7dd959dac9bdfb5204f213a (diff)
downloadbcm5719-llvm-b49eb3ab4b7563d97a871dcd7dae4bd383c6c42f.tar.gz
bcm5719-llvm-b49eb3ab4b7563d97a871dcd7dae4bd383c6c42f.zip
[X86] Fold (trunc (i32 (zextload i16))) into vbroadcast.
When matching non-LSB-extracting truncating broadcasts, we now insert the necessary SRL. If the scalar resulted from a load, the SRL will be folded into it, creating a narrower, offset, load. However, i16 loads aren't Desirable, so we get i16->i32 zextloads. We already catch i16 aextloads; catch these as well. llvm-svn: 252363
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll8
2 files changed, 4 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
index f4cb64e2c91..cd66524870c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -2258,9 +2258,7 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
;
; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
; AVX2: # BB#0:
-; AVX2-NEXT: movzwl 2(%rdi), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
; AVX2-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -2298,9 +2296,7 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
;
; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
; AVX2: # BB#0:
-; AVX2-NEXT: movzwl 2(%rdi), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
; AVX2-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
index 4d2bcd9bc8d..10e8e3a680c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -3342,9 +3342,7 @@ define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
;
; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
; AVX2: # BB#0:
-; AVX2-NEXT: movzwl 2(%rdi), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
; AVX2-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -3363,9 +3361,7 @@ define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
;
; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
; AVX2: # BB#0:
-; AVX2-NEXT: movzwl 2(%rdi), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
; AVX2-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
OpenPOWER on IntegriCloud