diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-01-31 14:04:07 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-01-31 14:04:07 +0000 |
commit | 63f3383ece25efa625eab4d0cd75b4225a3f034c (patch) | |
tree | 2516c1fb102104328d39ce3a1c9e374ab7197671 | |
parent | 140f75f625bc815c5c7c73a6ba765d49998f95c4 (diff) | |
download | bcm5719-llvm-63f3383ece25efa625eab4d0cd75b4225a3f034c.tar.gz bcm5719-llvm-63f3383ece25efa625eab4d0cd75b4225a3f034c.zip |
[X86][AVX] Fold broadcast(bitcast(src)) -> bitcast(broadcast(src))
llvm-svn: 352751
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-vbroadcast.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx2-vbroadcast.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/subvector-broadcast.ll | 9 |
4 files changed, 16 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cb5dce233f7..9bc3d482074 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31938,6 +31938,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); } + // broadcast(bitcast(src)) -> bitcast(broadcast(src)) + // 32-bit targets have to bitcast i64 to f64, so better to bitcast upward. + if (Src.getOpcode() == ISD::BITCAST && + SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits()) { + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(), + VT.getVectorNumElements()); + return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC)); + } return SDValue(); } case X86ISD::PSHUFD: diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll index ccb054dce43..9346c944eb8 100644 --- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll @@ -45,9 +45,9 @@ define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp { ; X64-LABEL: A2: ; X64: ## %bb.0: ## %entry ; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: vmovq %rax, %xmm0 ; X64-NEXT: movq %rax, (%rsi) -; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X64-NEXT: vmovq %rax, %xmm0 +; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -110,8 +110,8 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp { ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %ecx -; X32-NEXT: vmovd %ecx, %xmm0 ; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: vmovd %ecx, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl @@ -119,8 +119,8 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp { ; X64-LABEL: B3: ; X64: ## %bb.0: ## %entry ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: movl %eax, (%rsi) +; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll index b333e9109bd..9d4cfcefd64 100644 --- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll @@ -244,8 +244,7 @@ define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) { ; X32-LABEL: broadcast_mem_v4i16_v16i16: ; X32: ## %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: broadcast_mem_v4i16_v16i16: diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll index f72d9ba7125..066e4dc016a 100644 --- a/llvm/test/CodeGen/X86/subvector-broadcast.ll +++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll @@ -1592,15 +1592,13 @@ define <8 x i32> @test_2xi32_to_8xi32_mem(<2 x i32>* %vp) { ; X32-AVX2-LABEL: test_2xi32_to_8xi32_mem: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-AVX2-NEXT: retl ; ; X32-AVX512-LABEL: test_2xi32_to_8xi32_mem: ; X32-AVX512: # %bb.0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-AVX512-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-AVX512-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-AVX512-NEXT: retl ; ; X64-AVX1-LABEL: test_2xi32_to_8xi32_mem: @@ -1635,8 +1633,7 @@ define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) { ; X32-AVX2-LABEL: test_2xi32_to_16xi32_mem: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-AVX2-NEXT: vmovaps %ymm0, %ymm1 ; X32-AVX2-NEXT: retl ; |