summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-12-16 22:57:17 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-12-16 22:57:17 +0000
commitf3b6da00f587026ef7ca098329b63ec17f155c0d (patch)
tree02655c8c8dc7e6bc607383acf5df0c6a03872748 /llvm
parent68d7f9da76e39819a377ab405b5e5cf2b5298466 (diff)
downloadbcm5719-llvm-f3b6da00f587026ef7ca098329b63ec17f155c0d.tar.gz
bcm5719-llvm-f3b6da00f587026ef7ca098329b63ec17f155c0d.zip
[X86][AVX] Fix failed broadcast fold
Strip excess BITCASTs from EXTRACT_SUBVECTOR input llvm-svn: 320930
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/test/CodeGen/X86/widened-broadcast.ll20
2 files changed, 11 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3cd1704c9c6..01636128df0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10483,9 +10483,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// We only support broadcasting from 128-bit vectors to minimize the
// number of patterns we need to deal with in isel. So extract down to
- // 128-bits.
- if (SrcVT.getSizeInBits() > 128)
- V = extract128BitVector(V, 0, DAG, DL);
+ // 128-bits, removing as many bitcasts as possible.
+ if (SrcVT.getSizeInBits() > 128) {
+ MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ 128 / SrcVT.getScalarSizeInBits());
+ V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
+ V = DAG.getBitcast(ExtVT, V);
+ }
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll
index 564371c7321..3de39f72022 100644
--- a/llvm/test/CodeGen/X86/widened-broadcast.ll
+++ b/llvm/test/CodeGen/X86/widened-broadcast.ll
@@ -526,22 +526,10 @@ define <32 x i8> @load_splat_32i8_32i8_01230123012301230123012301230123(<32 x i8
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vbroadcastss (%rdi), %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vmovaps (%rdi), %ymm0
-; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vmovaps (%rdi), %ymm0
-; AVX512-NEXT: vbroadcastss %xmm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vbroadcastss (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <32 x i8>, <32 x i8>* %ptr
%ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
OpenPOWER on IntegriCloud