summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJustin Bogner <mail@justinbogner.com>2019-03-19 16:52:00 +0000
committerJustin Bogner <mail@justinbogner.com>2019-03-19 16:52:00 +0000
commitb353d6887ea583f9dbdbe84bcc848b1d36314cde (patch)
tree85026188d6b3568f0ca96d707b13145430c7b58d
parent4ce581e5a9f902e5b6db0fd3115f5e914f105207 (diff)
downloadbcm5719-llvm-b353d6887ea583f9dbdbe84bcc848b1d36314cde.tar.gz
bcm5719-llvm-b353d6887ea583f9dbdbe84bcc848b1d36314cde.zip
[DAGCombine] Fix a miscompile when reducing BUILD_VECTORs to a shuffle
In r311255 we added a case where we split vectors whose elements are all derived from the same input vector so that we could shuffle it more efficiently. In doing so, createBuildVecShuffle was taught to adjust for the fact that all indices would be based off of the first vector when this happens, but it's possible for the code that checked that to fire incorrectly if we happen to have a BUILD_VECTOR of extracts from subvectors and don't hit this new optimization. Instead of trying to detect if we've split the vector by checking if we have extracts from the same base vector, we can just pass that information into createBuildVecShuffle, avoiding the miscompile. Differential Revision: https://reviews.llvm.org/D59507 llvm-svn: 356476
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp20
-rw-r--r--llvm/test/CodeGen/X86/shuffle-extract-subvector.ll48
2 files changed, 57 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cb19c0a4e0f..4d85532a81b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -475,7 +475,8 @@ namespace {
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
- SDValue VecIn2, unsigned LeftIdx);
+ SDValue VecIn2, unsigned LeftIdx,
+ bool DidSplitVec);
SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
/// Walk up chain skipping non-aliasing memory nodes,
@@ -16416,7 +16417,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
- unsigned LeftIdx) {
+ unsigned LeftIdx, bool DidSplitVec) {
MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
@@ -16424,17 +16425,12 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
EVT InVT1 = VecIn1.getValueType();
EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
- unsigned Vec2Offset = 0;
unsigned NumElems = VT.getVectorNumElements();
unsigned ShuffleNumElems = NumElems;
- // In case both the input vectors are extracted from same base
- // vector we do not need extra addend (Vec2Offset) while
- // computing shuffle mask.
- if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
- !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
- !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
- Vec2Offset = InVT1.getVectorNumElements();
+ // If we artificially split a vector in two already, then the offsets in the
+ // operands will all be based off of VecIn1, even those in VecIn2.
+ unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
@@ -16693,6 +16689,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// vector, then split the vector efficiently based on the maximum
// vector access index and adjust the VectorMask and
// VecIn accordingly.
+ bool DidSplitVec = false;
if (VecIn.size() == 2) {
unsigned MaxIndex = 0;
unsigned NearestPow2 = 0;
@@ -16723,6 +16720,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
VecIn.pop_back();
VecIn.push_back(VecIn1);
VecIn.push_back(VecIn2);
+ DidSplitVec = true;
for (unsigned i = 0; i < NumElems; i++) {
if (VectorMask[i] <= 0)
@@ -16760,7 +16758,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
(LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
- VecRight, LeftIdx))
+ VecRight, LeftIdx, DidSplitVec))
Shuffles.push_back(Shuffle);
else
return SDValue();
diff --git a/llvm/test/CodeGen/X86/shuffle-extract-subvector.ll b/llvm/test/CodeGen/X86/shuffle-extract-subvector.ll
new file mode 100644
index 00000000000..02ec475b05d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/shuffle-extract-subvector.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define void @f(<4 x half>* %a, <4 x half>* %b, <8 x half>* %c) {
+; CHECK-LABEL: f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movzwl (%rdi), %r8d
+; CHECK-NEXT: movzwl 2(%rdi), %r9d
+; CHECK-NEXT: movzwl 4(%rdi), %r11d
+; CHECK-NEXT: movzwl 6(%rdi), %edi
+; CHECK-NEXT: movzwl (%rsi), %r10d
+; CHECK-NEXT: movzwl 2(%rsi), %ecx
+; CHECK-NEXT: movzwl 4(%rsi), %eax
+; CHECK-NEXT: movzwl 6(%rsi), %esi
+; CHECK-NEXT: movw %si, 14(%rdx)
+; CHECK-NEXT: movw %di, 12(%rdx)
+; CHECK-NEXT: movw %ax, 10(%rdx)
+; CHECK-NEXT: movw %r11w, 8(%rdx)
+; CHECK-NEXT: movw %cx, 6(%rdx)
+; CHECK-NEXT: movw %r9w, 4(%rdx)
+; CHECK-NEXT: movw %r10w, 2(%rdx)
+; CHECK-NEXT: movw %r8w, (%rdx)
+; CHECK-NEXT: retq
+ %tmp4 = load <4 x half>, <4 x half>* %a
+ %tmp5 = load <4 x half>, <4 x half>* %b
+ %tmp7 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 0, i32 4>
+ %tmp8 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 1, i32 5>
+ %tmp9 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 2, i32 6>
+ %tmp10 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 3, i32 7>
+ %tmp11 = extractelement <2 x half> %tmp7, i32 0
+ %tmp12 = insertelement <8 x half> undef, half %tmp11, i32 0
+ %tmp13 = extractelement <2 x half> %tmp7, i32 1
+ %tmp14 = insertelement <8 x half> %tmp12, half %tmp13, i32 1
+ %tmp15 = extractelement <2 x half> %tmp8, i32 0
+ %tmp16 = insertelement <8 x half> %tmp14, half %tmp15, i32 2
+ %tmp17 = extractelement <2 x half> %tmp8, i32 1
+ %tmp18 = insertelement <8 x half> %tmp16, half %tmp17, i32 3
+ %tmp19 = extractelement <2 x half> %tmp9, i32 0
+ %tmp20 = insertelement <8 x half> %tmp18, half %tmp19, i32 4
+ %tmp21 = extractelement <2 x half> %tmp9, i32 1
+ %tmp22 = insertelement <8 x half> %tmp20, half %tmp21, i32 5
+ %tmp23 = extractelement <2 x half> %tmp10, i32 0
+ %tmp24 = insertelement <8 x half> %tmp22, half %tmp23, i32 6
+ %tmp25 = extractelement <2 x half> %tmp10, i32 1
+ %tmp26 = insertelement <8 x half> %tmp24, half %tmp25, i32 7
+ store <8 x half> %tmp26, <8 x half>* %c
+ ret void
+}
OpenPOWER on IntegriCloud