diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-01-31 17:48:35 +0000 | 
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-01-31 17:48:35 +0000 | 
| commit | eb6aef6db36ab681fb57cabd24c1f2be51fa5324 (patch) | |
| tree | d960f0a9d835b2dd8c78eec0cc31fe49f266abd3 /llvm | |
| parent | d04a2d2d5e3183476b0e5d2fe3ea824de2f76917 (diff) | |
| download | bcm5719-llvm-eb6aef6db36ab681fb57cabd24c1f2be51fa5324.tar.gz bcm5719-llvm-eb6aef6db36ab681fb57cabd24c1f2be51fa5324.zip  | |
[X86][AVX] Fold concat(broadcast(x),broadcast(x)) -> broadcast(x)
Differential Revision: https://reviews.llvm.org/D57514
llvm-svn: 352774
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/subvector-broadcast.ll | 79 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widened-broadcast.ll | 22 | 
3 files changed, 30 insertions, 82 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index da479eb692c..18240ef0014 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41651,12 +41651,11 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,          if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && Vec.hasOneUse())            return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec); -      // If this is subv_broadcast insert into both halves, use a larger -      // subv_broadcast. -      // TODO - handle X86ISD::VBROADCAST as well? -      if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) -        return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, -                           SubVec.getOperand(0)); +      // If this broadcast/subv_broadcast is inserted into both halves, use a +      // larger broadcast/subv_broadcast. +      if (SubVec == SubVec2 && (SubVec.getOpcode() == X86ISD::VBROADCAST || +                                SubVec.getOpcode() == X86ISD::SUBV_BROADCAST)) +        return DAG.getNode(SubVec.getOpcode(), dl, OpVT, SubVec.getOperand(0));        // If we're inserting all zeros into the upper half, change this to        // an insert into an all zeros vector. We will match this to a move diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll index 066e4dc016a..c0ab9ac47c9 100644 --- a/llvm/test/CodeGen/X86/subvector-broadcast.ll +++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll @@ -1582,60 +1582,28 @@ define <4 x i32> @test_2xi32_to_4xi32_mem(<2 x i32>* %vp) {  }  define <8 x i32> @test_2xi32_to_8xi32_mem(<2 x i32>* %vp) { -; X32-AVX1-LABEL: test_2xi32_to_8xi32_mem: -; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax -; X32-AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0] -; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX1-NEXT:    retl -; -; X32-AVX2-LABEL: test_2xi32_to_8xi32_mem: -; X32-AVX2:       # %bb.0: -; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT:    vbroadcastsd (%eax), %ymm0 -; X32-AVX2-NEXT:    retl -; -; X32-AVX512-LABEL: test_2xi32_to_8xi32_mem: -; X32-AVX512:       # %bb.0: -; X32-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT:    vbroadcastsd (%eax), %ymm0 -; X32-AVX512-NEXT:    retl -; -; X64-AVX1-LABEL: test_2xi32_to_8xi32_mem: -; X64-AVX1:       # %bb.0: -; X64-AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0] -; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX1-NEXT:    retq -; -; X64-AVX2-LABEL: test_2xi32_to_8xi32_mem: -; X64-AVX2:       # %bb.0: -; X64-AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0 -; X64-AVX2-NEXT:    retq +; X32-LABEL: test_2xi32_to_8xi32_mem: +; X32:       # %bb.0: +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    vbroadcastsd (%eax), %ymm0 +; X32-NEXT:    retl  ; -; X64-AVX512-LABEL: test_2xi32_to_8xi32_mem: -; X64-AVX512:       # %bb.0: -; X64-AVX512-NEXT:    vbroadcastsd (%rdi), %ymm0 -; X64-AVX512-NEXT:    retq +; X64-LABEL: test_2xi32_to_8xi32_mem: +; X64:       # %bb.0: +; X64-NEXT:    vbroadcastsd (%rdi), %ymm0 +; X64-NEXT:    retq    %vec = load <2 x i32>, <2 x i32>* %vp    %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>    ret <8 x i32> %res  }  define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) { -; X32-AVX1-LABEL: test_2xi32_to_16xi32_mem: -; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax -; X32-AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0] -; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX1-NEXT:    vmovaps %ymm0, %ymm1 -; X32-AVX1-NEXT:    retl -; -; X32-AVX2-LABEL: test_2xi32_to_16xi32_mem: -; X32-AVX2:       # %bb.0: -; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT:    vbroadcastsd (%eax), %ymm0 -; X32-AVX2-NEXT:    vmovaps %ymm0, %ymm1 -; X32-AVX2-NEXT:    retl +; X32-AVX-LABEL: test_2xi32_to_16xi32_mem: +; X32-AVX:       # %bb.0: +; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-AVX-NEXT:    vbroadcastsd (%eax), %ymm0 +; X32-AVX-NEXT:    vmovaps %ymm0, %ymm1 +; X32-AVX-NEXT:    retl  ;  ; X32-AVX512-LABEL: test_2xi32_to_16xi32_mem:  ; X32-AVX512:       # %bb.0: @@ -1645,18 +1613,11 @@ define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) {  ; X32-AVX512-NEXT:    vpermd %zmm0, %zmm1, %zmm0  ; X32-AVX512-NEXT:    retl  ; -; X64-AVX1-LABEL: test_2xi32_to_16xi32_mem: -; X64-AVX1:       # %bb.0: -; X64-AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0] -; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX1-NEXT:    vmovaps %ymm0, %ymm1 -; X64-AVX1-NEXT:    retq -; -; X64-AVX2-LABEL: test_2xi32_to_16xi32_mem: -; X64-AVX2:       # %bb.0: -; X64-AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0 -; X64-AVX2-NEXT:    vmovaps %ymm0, %ymm1 -; X64-AVX2-NEXT:    retq +; X64-AVX-LABEL: test_2xi32_to_16xi32_mem: +; X64-AVX:       # %bb.0: +; X64-AVX-NEXT:    vbroadcastsd (%rdi), %ymm0 +; X64-AVX-NEXT:    vmovaps %ymm0, %ymm1 +; X64-AVX-NEXT:    retq  ;  ; X64-AVX512-LABEL: test_2xi32_to_16xi32_mem:  ; X64-AVX512:       # %bb.0: diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll index c192ee6fd28..ecbeb532f27 100644 --- a/llvm/test/CodeGen/X86/widened-broadcast.ll +++ b/llvm/test/CodeGen/X86/widened-broadcast.ll @@ -607,21 +607,10 @@ define <8 x i32> @load_splat_8i32_2i32_0101(<2 x i32>* %vp) {  ; SSE-NEXT:    movdqa %xmm0, %xmm1  ; SSE-NEXT:    retq  ; -; AVX1-LABEL: load_splat_8i32_2i32_0101: -; AVX1:       # %bb.0: -; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0] -; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT:    retq -; -; AVX2-LABEL: load_splat_8i32_2i32_0101: -; AVX2:       # %bb.0: -; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT:    retq -; -; AVX512-LABEL: load_splat_8i32_2i32_0101: -; AVX512:       # %bb.0: -; AVX512-NEXT:    vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT:    retq +; AVX-LABEL: load_splat_8i32_2i32_0101: +; AVX:       # %bb.0: +; AVX-NEXT:    vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT:    retq    %vec = load <2 x i32>, <2 x i32>* %vp    %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>    ret <8 x i32> %res @@ -639,8 +628,7 @@ define <16 x i32> @load_splat_16i32_2i32_0101(<2 x i32>* %vp) {  ;  ; AVX1-LABEL: load_splat_16i32_2i32_0101:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0] -; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0  ; AVX1-NEXT:    vmovaps %ymm0, %ymm1  ; AVX1-NEXT:    retq  ;  | 

