diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-09-03 22:25:49 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-09-03 22:25:49 +0000 |
| commit | 788fbe08db9c557f8a445540a197e1e9d9c31493 (patch) | |
| tree | 0c632886a268c93f1f310f8489d834e8ba559758 /llvm | |
| parent | d8f067539b5733a9dbddeb8dc2e488aee76f1aa6 (diff) | |
| download | bcm5719-llvm-788fbe08db9c557f8a445540a197e1e9d9c31493.tar.gz bcm5719-llvm-788fbe08db9c557f8a445540a197e1e9d9c31493.zip | |
[X86] Combine inserting a vector of zeros into a vector of zeros just the larger vector.
llvm-svn: 312458
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll | 18 |
2 files changed, 9 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0193f2bfd58..2cb6ec376c4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35656,6 +35656,11 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); MVT SubVecVT = SubVec.getSimpleValueType(); + // Inserting zeros into zeros is a nop. + if (ISD::isBuildVectorAllZeros(Vec.getNode()) && + ISD::isBuildVectorAllZeros(SubVec.getNode())) + return Vec; + // If this is an insert of an extract, combine to a shuffle. Don't do this // if the insert or extract can be represented with a subregister operation. if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index 18afae90ad9..e5487ae2b10 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -1134,17 +1134,13 @@ define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind { ; X32-LABEL: test_mm512_zextpd128_pd512: ; X32: # BB#0: ; X32-NEXT: vmovaps %xmm0, %xmm0 -; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X32-NEXT: vmovaps %xmm1, %xmm1 -; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; X32-NEXT: vmovaps %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_zextpd128_pd512: ; X64: # BB#0: ; X64-NEXT: vmovaps %xmm0, %xmm0 -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: vmovaps %xmm1, %xmm1 -; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; X64-NEXT: vmovaps %ymm0, %ymm0 ; X64-NEXT: retq %res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> ret <8 x double> %res @@ -1169,7 +1165,6 @@ define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind { ; X32: # BB#0: ; X32-NEXT: vmovaps %xmm0, %xmm0 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X32-NEXT: vmovaps %xmm1, %xmm1 ; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-NEXT: retl ; @@ -1177,7 +1172,6 @@ define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind { ; X64: # BB#0: ; X64-NEXT: vmovaps %xmm0, %xmm0 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: vmovaps %xmm1, %xmm1 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X64-NEXT: retq %res = shufflevector <4 x float> %a0, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> @@ -1202,17 +1196,13 @@ define <8 x i64> @test_mm512_zextsi128_si512(<2 x i64> %a0) nounwind { ; X32-LABEL: test_mm512_zextsi128_si512: ; X32: # BB#0: ; X32-NEXT: vmovaps %xmm0, %xmm0 -; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X32-NEXT: vmovaps %xmm1, %xmm1 -; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; X32-NEXT: vmovaps %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_zextsi128_si512: ; X64: # BB#0: ; X64-NEXT: vmovaps %xmm0, %xmm0 -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: vmovaps %xmm1, %xmm1 -; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; X64-NEXT: vmovaps %ymm0, %ymm0 ; X64-NEXT: retq %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> ret <8 x i64> %res |

