summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp9
-rw-r--r--llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx-vbroadcast.ll20
3 files changed, 12 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7b97e79de5d..17ac3da53f5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8175,6 +8175,11 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
MVT BroadcastVT = VT;
+ // Peek through any bitcast (only useful for loads).
+ SDValue BC = V;
+ while (BC.getOpcode() == ISD::BITCAST)
+ BC = BC.getOperand(0);
+
// Also check the simpler case, where we can directly reuse the scalar.
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
@@ -8184,14 +8189,14 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
// Only AVX2 has register broadcasts.
if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
return SDValue();
- } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+ } else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64)
BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
// If we are broadcasting a load that is only used by the shuffle
// then we can reduce the vector load to the broadcasted scalar load.
- LoadSDNode *Ld = cast<LoadSDNode>(V);
+ LoadSDNode *Ld = cast<LoadSDNode>(BC);
SDValue BaseAddr = Ld->getOperand(1);
EVT AddrVT = BaseAddr.getValueType();
EVT SVT = BroadcastVT.getScalarType();
diff --git a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
index 92ec107a007..6950641a08a 100644
--- a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -3,9 +3,7 @@
define void @endless_loop() {
; CHECK-LABEL: endless_loop:
; CHECK-NEXT: # BB#0:
-; CHECK-NEXT: vmovaps (%eax), %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT: vbroadcastss (%eax), %ymm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
index a98a86b733e..0c92f4884fb 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
@@ -192,18 +192,12 @@ define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtabl
; X32-LABEL: load_splat_8i32_8i32_55555555:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovaps (%eax), %ymm0
-; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
-; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vbroadcastss 20(%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_splat_8i32_8i32_55555555:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vmovaps (%rdi), %ymm0
-; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
-; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: vbroadcastss 20(%rdi), %ymm0
; X64-NEXT: retq
entry:
%ld = load <8 x i32>, <8 x i32>* %ptr
@@ -304,18 +298,12 @@ define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable re
; X32-LABEL: load_splat_4i64_4i64_2222:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovapd (%eax), %ymm0
-; X32-NEXT: vextractf128 $1, %ymm0, %xmm0
-; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vbroadcastsd 16(%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_splat_4i64_4i64_2222:
; X64: ## BB#0: ## %entry
-; X64-NEXT: vmovapd (%rdi), %ymm0
-; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
-; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
; X64-NEXT: retq
entry:
%ld = load <4 x i64>, <4 x i64>* %ptr
OpenPOWER on IntegriCloud