summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-09-03 22:25:49 +0000
committerCraig Topper <craig.topper@intel.com>2017-09-03 22:25:49 +0000
commit788fbe08db9c557f8a445540a197e1e9d9c31493 (patch)
tree0c632886a268c93f1f310f8489d834e8ba559758 /llvm
parentd8f067539b5733a9dbddeb8dc2e488aee76f1aa6 (diff)
downloadbcm5719-llvm-788fbe08db9c557f8a445540a197e1e9d9c31493.tar.gz
bcm5719-llvm-788fbe08db9c557f8a445540a197e1e9d9c31493.zip
[X86] Combine inserting a vector of zeros into a vector of zeros just the larger vector.
llvm-svn: 312458
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp5
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll18
2 files changed, 9 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0193f2bfd58..2cb6ec376c4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35656,6 +35656,11 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT SubVecVT = SubVec.getSimpleValueType();
+ // Inserting zeros into zeros is a nop.
+ if (ISD::isBuildVectorAllZeros(Vec.getNode()) &&
+ ISD::isBuildVectorAllZeros(SubVec.getNode()))
+ return Vec;
+
// If this is an insert of an extract, combine to a shuffle. Don't do this
// if the insert or extract can be represented with a subregister operation.
if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
index 18afae90ad9..e5487ae2b10 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -1134,17 +1134,13 @@ define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind {
; X32-LABEL: test_mm512_zextpd128_pd512:
; X32: # BB#0:
; X32-NEXT: vmovaps %xmm0, %xmm0
-; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT: vmovaps %xmm1, %xmm1
-; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vmovaps %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextpd128_pd512:
; X64: # BB#0:
; X64-NEXT: vmovaps %xmm0, %xmm0
-; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT: vmovaps %xmm1, %xmm1
-; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X64-NEXT: vmovaps %ymm0, %ymm0
; X64-NEXT: retq
%res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
ret <8 x double> %res
@@ -1169,7 +1165,6 @@ define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind {
; X32: # BB#0:
; X32-NEXT: vmovaps %xmm0, %xmm0
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT: vmovaps %xmm1, %xmm1
; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-NEXT: retl
;
@@ -1177,7 +1172,6 @@ define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind {
; X64: # BB#0:
; X64-NEXT: vmovaps %xmm0, %xmm0
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT: vmovaps %xmm1, %xmm1
; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X64-NEXT: retq
%res = shufflevector <4 x float> %a0, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
@@ -1202,17 +1196,13 @@ define <8 x i64> @test_mm512_zextsi128_si512(<2 x i64> %a0) nounwind {
; X32-LABEL: test_mm512_zextsi128_si512:
; X32: # BB#0:
; X32-NEXT: vmovaps %xmm0, %xmm0
-; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT: vmovaps %xmm1, %xmm1
-; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vmovaps %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextsi128_si512:
; X64: # BB#0:
; X64-NEXT: vmovaps %xmm0, %xmm0
-; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT: vmovaps %xmm1, %xmm1
-; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X64-NEXT: vmovaps %ymm0, %ymm0
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
ret <8 x i64> %res
OpenPOWER on IntegriCloud