summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-12-16 22:57:17 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-12-16 22:57:17 +0000
commitf3b6da00f587026ef7ca098329b63ec17f155c0d (patch)
tree02655c8c8dc7e6bc607383acf5df0c6a03872748
parent68d7f9da76e39819a377ab405b5e5cf2b5298466 (diff)
downloadbcm5719-llvm-f3b6da00f587026ef7ca098329b63ec17f155c0d.tar.gz
bcm5719-llvm-f3b6da00f587026ef7ca098329b63ec17f155c0d.zip
[X86][AVX] Fix failed broadcast fold
Strip excess BITCASTs from EXTRACT_SUBVECTOR input llvm-svn: 320930
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/test/CodeGen/X86/widened-broadcast.ll20
2 files changed, 11 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3cd1704c9c6..01636128df0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10483,9 +10483,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// We only support broadcasting from 128-bit vectors to minimize the
// number of patterns we need to deal with in isel. So extract down to
- // 128-bits.
- if (SrcVT.getSizeInBits() > 128)
- V = extract128BitVector(V, 0, DAG, DL);
+ // 128-bits, removing as many bitcasts as possible.
+ if (SrcVT.getSizeInBits() > 128) {
+ MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ 128 / SrcVT.getScalarSizeInBits());
+ V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
+ V = DAG.getBitcast(ExtVT, V);
+ }
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll
index 564371c7321..3de39f72022 100644
--- a/llvm/test/CodeGen/X86/widened-broadcast.ll
+++ b/llvm/test/CodeGen/X86/widened-broadcast.ll
@@ -526,22 +526,10 @@ define <32 x i8> @load_splat_32i8_32i8_01230123012301230123012301230123(<32 x i8
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vbroadcastss (%rdi), %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vmovaps (%rdi), %ymm0
-; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vmovaps (%rdi), %ymm0
-; AVX512-NEXT: vbroadcastss %xmm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vbroadcastss (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <32 x i8>, <32 x i8>* %ptr
%ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
OpenPOWER on IntegriCloud