diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll | 8 | 
3 files changed, 10 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 17edb500d66..e4ff9b34345 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8365,6 +8365,12 @@ let Predicates = [HasAVX2] in {              (VPBROADCASTWrm addr:$src)>;    def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),              (VPBROADCASTWYrm addr:$src)>; +  def : Pat<(v8i16 (X86VBroadcast +              (i16 (trunc (i32 (zextloadi16 addr:$src)))))), +            (VPBROADCASTWrm addr:$src)>; +  def : Pat<(v16i16 (X86VBroadcast +              (i16 (trunc (i32 (zextloadi16 addr:$src)))))), +            (VPBROADCASTWYrm addr:$src)>;    // Provide aliases for broadcast from the same register class that    // automatically does the extract. diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index f4cb64e2c91..cd66524870c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2258,9 +2258,7 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {  ;  ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:  ; AVX2:       # BB#0: -; AVX2-NEXT:    movzwl 2(%rdi), %eax -; AVX2-NEXT:    vmovd %eax, %xmm0 -; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0  ; AVX2-NEXT:    retq    %tmp = load i32, i32* %ptr, align 4    %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -2298,9 +2296,7 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {  ;  ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:  ; AVX2:       # BB#0: -; AVX2-NEXT:    movzwl 2(%rdi), %eax -; AVX2-NEXT:    vmovd %eax, %xmm0 -; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0  ; AVX2-NEXT:    retq    %tmp = load i32, i32* %ptr, align 4    %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index 4d2bcd9bc8d..10e8e3a680c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -3342,9 +3342,7 @@ define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {  ;  ; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:  ; AVX2:       # BB#0: -; AVX2-NEXT:    movzwl 2(%rdi), %eax -; AVX2-NEXT:    vmovd %eax, %xmm0 -; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0 +; AVX2-NEXT:    vpbroadcastw 2(%rdi), %ymm0  ; AVX2-NEXT:    retq    %tmp = load i32, i32* %ptr, align 4    %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -3363,9 +3361,7 @@ define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {  ;  ; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:  ; AVX2:       # BB#0: -; AVX2-NEXT:    movzwl 2(%rdi), %eax -; AVX2-NEXT:    vmovd %eax, %xmm0 -; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0 +; AVX2-NEXT:    vpbroadcastw 2(%rdi), %ymm0  ; AVX2-NEXT:    retq    %tmp = load i32, i32* %ptr, align 4    %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1  | 

