diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/madd.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/madd.ll | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index 3fd4896f6a3..0523fa78699 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -41,14 +41,14 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX2-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2 ; AVX2-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2 -; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: addq $8, %rcx ; AVX2-NEXT: cmpq %rcx, %rax ; AVX2-NEXT: jne .LBB0_1 ; AVX2-NEXT: # BB#2: # %middle.block -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0 @@ -67,14 +67,14 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX512-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2 ; AVX512-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2 -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2 -; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0 +; AVX512-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVX512-NEXT: vpaddd %ymm1, %ymm2, %ymm1 ; AVX512-NEXT: addq $8, %rcx ; AVX512-NEXT: cmpq %rcx, %rax ; AVX512-NEXT: jne .LBB0_1 ; AVX512-NEXT: # BB#2: # %middle.block -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vphaddd %ymm0, %ymm0, %ymm0 @@ -317,14 +317,14 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3 ; AVX512-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2 ; AVX512-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3 ; AVX512-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm2 -; AVX512-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vinserti64x4 $0, %ymm2, %zmm0, %zmm2 +; AVX512-NEXT: vpaddd %zmm1, %zmm2, %zmm1 ; AVX512-NEXT: addq $16, %rcx ; AVX512-NEXT: cmpq %rcx, %rax ; AVX512-NEXT: jne .LBB2_1 ; AVX512-NEXT: # BB#2: # %middle.block -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vextracti64x4 $1, %zmm1, %ymm0 +; AVX512-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] |

