summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-02-03 14:39:41 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-02-03 14:39:41 +0000
commita2a3e5b811aa6b4ec43f28d365c0f798ba0d5772 (patch)
tree5efabe36936a799413daea468aa009792438625d
parent3d6ecfc078e45565ffca3cc5bdf025587ee3de50 (diff)
downloadbcm5719-llvm-a2a3e5b811aa6b4ec43f28d365c0f798ba0d5772.tar.gz
bcm5719-llvm-a2a3e5b811aa6b4ec43f28d365c0f798ba0d5772.zip
[X86][AVX] More aggressively simplify BROADCAST source operand
Aim to use scalar source or lowest 128-bit vector directly. We're still missing some VZMOVL_LOAD combines. llvm-svn: 352994
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll35
-rw-r--r--llvm/test/CodeGen/X86/subvector-broadcast.ll6
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll13
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll6
-rw-r--r--llvm/test/CodeGen/X86/widened-broadcast.ll3
6 files changed, 31 insertions, 48 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bd132cd7340..9ea3c64697e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32004,12 +32004,13 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
switch (Opcode) {
case X86ISD::VBROADCAST: {
- // If broadcasting from another shuffle, attempt to simplify it.
- // TODO - we really need a general SimplifyDemandedVectorElts mechanism.
SDValue Src = N.getOperand(0);
SDValue BC = peekThroughBitcasts(Src);
EVT SrcVT = Src.getValueType();
EVT BCVT = BC.getValueType();
+
+ // If broadcasting from another shuffle, attempt to simplify it.
+ // TODO - we really need a general SimplifyDemandedVectorElts mechanism.
if (isTargetShuffle(BC.getOpcode()) &&
VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) {
unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits();
@@ -32023,6 +32024,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
}
+
// broadcast(bitcast(src)) -> bitcast(broadcast(src))
// 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
if (Src.getOpcode() == ISD::BITCAST &&
@@ -32031,6 +32033,16 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
VT.getVectorNumElements());
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
}
+
+ // Reduce broadcast source vector to lowest 128-bits.
+ if (SrcVT.getSizeInBits() > 128)
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+ extract128BitVector(Src, 0, DAG, DL));
+
+ // broadcast(scalar_to_vector(x)) -> broadcast(x).
+ if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+
return SDValue();
}
case X86ISD::PSHUFD:
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll
index e33399e97cd..9c9df48c03c 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll
@@ -536,8 +536,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask3(<2 x i32>* %vp, <8 x i3
define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) {
; CHECK-LABEL: test_2xi32_to_16xi32_mem:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
+; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -546,10 +545,8 @@ define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) {
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -561,10 +558,8 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -575,10 +570,8 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -590,10 +583,8 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -604,10 +595,8 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -619,10 +608,8 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -633,10 +620,8 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x
define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i32> %default, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm2, %zmm2
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -648,10 +633,8 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i
define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vpbroadcastq %xmm1, %zmm1
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll
index 0005bcc26c6..926f00099a4 100644
--- a/llvm/test/CodeGen/X86/subvector-broadcast.ll
+++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll
@@ -1598,8 +1598,7 @@ define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) {
; X32-AVX512-LABEL: test_2xi32_to_16xi32_mem:
; X32-AVX512: # %bb.0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX512-NEXT: vbroadcastsd %xmm0, %zmm0
+; X32-AVX512-NEXT: vbroadcastsd (%eax), %zmm0
; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_2xi32_to_16xi32_mem:
@@ -1610,8 +1609,7 @@ define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) {
;
; X64-AVX512-LABEL: test_2xi32_to_16xi32_mem:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-AVX512-NEXT: vbroadcastsd %xmm0, %zmm0
+; X64-AVX512-NEXT: vbroadcastsd (%rdi), %zmm0
; X64-AVX512-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index 40ba3c1a9ac..ff9a6210ca4 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -141,7 +141,6 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -165,7 +164,6 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -197,14 +195,12 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
; X86-LABEL: combine_permd_as_vpbroadcastd256:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X86-NEXT: vpbroadcastd %xmm0, %ymm0
; X86-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: combine_permd_as_vpbroadcastd256:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X64-NEXT: vpbroadcastd %xmm0, %ymm0
; X64-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
@@ -226,14 +222,12 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
; X86-LABEL: combine_permd_as_vpbroadcastq256:
; X86: # %bb.0:
-; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X86-NEXT: vpbroadcastq %xmm0, %ymm0
; X86-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: combine_permd_as_vpbroadcastq256:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; X64-NEXT: vpbroadcastq %xmm0, %ymm0
; X64-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
@@ -257,7 +251,6 @@ define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
define <8 x float> @combine_permps_as_vpbroadcastss256(<4 x float> %a) {
; CHECK-LABEL: combine_permps_as_vpbroadcastss256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -268,7 +261,6 @@ define <8 x float> @combine_permps_as_vpbroadcastss256(<4 x float> %a) {
define <4 x double> @combine_permps_as_vpbroadcastsd256(<2 x double> %a) {
; CHECK-LABEL: combine_permps_as_vpbroadcastsd256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -313,7 +305,6 @@ define <4 x float> @combine_vpbroadcast_pshufb_as_vpbroadcastss128(<4 x float> %
define <8 x float> @combine_vpbroadcast_permd_as_vpbroadcastss256(<4 x float> %a) {
; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastss256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> zeroinitializer
@@ -324,7 +315,6 @@ define <8 x float> @combine_vpbroadcast_permd_as_vpbroadcastss256(<4 x float> %a
define <4 x double> @combine_vpbroadcast_permd_as_vpbroadcastsd256(<2 x double> %a) {
; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastsd256:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> zeroinitializer
@@ -665,7 +655,8 @@ define <16 x i8> @combine_pshufb_insertion_as_broadcast_v2i64(i64 %a0) {
define <8 x i32> @combine_permd_insertion_as_broadcast_v4i64(i64 %a0) {
; X86-LABEL: combine_permd_insertion_as_broadcast_v4i64:
; X86: # %bb.0:
-; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vbroadcastsd %xmm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: combine_permd_insertion_as_broadcast_v4i64:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
index 729863cada8..6a295ba8cc5 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
@@ -975,13 +975,13 @@ define <16 x float> @combine_vpermi2var_vpermvar_16f32_as_vperm2_zero(<16 x floa
define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) {
; X86-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64:
; X86: # %bb.0:
-; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %zmm0
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vbroadcastsd %xmm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermvar_insertion_as_broadcast_v8i64:
; X64: # %bb.0:
-; X64-NEXT: vmovq %rdi, %xmm0
-; X64-NEXT: vpbroadcastq %xmm0, %zmm0
+; X64-NEXT: vpbroadcastq %rdi, %zmm0
; X64-NEXT: retq
%1 = insertelement <8 x i64> undef, i64 %a0, i32 0
%2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %1, <8 x i64> zeroinitializer)
diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll
index dcc84a4d82c..ae4f901b157 100644
--- a/llvm/test/CodeGen/X86/widened-broadcast.ll
+++ b/llvm/test/CodeGen/X86/widened-broadcast.ll
@@ -630,8 +630,7 @@ define <16 x i32> @load_splat_16i32_2i32_0101(<2 x i32>* %vp) {
;
; AVX512-LABEL: load_splat_16i32_2i32_0101:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT: vbroadcastsd %xmm0, %zmm0
+; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0
; AVX512-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
%res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
OpenPOWER on IntegriCloud