summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll4
2 files changed, 9 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e19041651be..ea1270e9cc8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32257,6 +32257,13 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
MVT SrcVT = Src.getSimpleValueType();
if (!SrcVT.isVector())
return false;
+ // Don't bother broadcasting if we just need the 0'th element.
+ if (DemandedElts == 1) {
+ if(Src.getValueType() != VT)
+ Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG,
+ SDLoc(Op));
+ return TLO.CombineTo(Op, Src);
+ }
APInt SrcUndef, SrcZero;
APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0);
if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index 95d53ace5d3..2ea0f1ab3e7 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -313,7 +313,7 @@ define <4 x float> @combine_vpbroadcast_pshufb_as_vpbroadcastss128(<4 x float> %
define <8 x float> @combine_vpbroadcast_permd_as_vpbroadcastss256(<4 x float> %a) {
; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastss256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> zeroinitializer
@@ -324,7 +324,7 @@ define <8 x float> @combine_vpbroadcast_permd_as_vpbroadcastss256(<4 x float> %a
define <4 x double> @combine_vpbroadcast_permd_as_vpbroadcastsd256(<2 x double> %a) {
; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastsd256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> zeroinitializer
OpenPOWER on IntegriCloud