diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/shuffle-extract-subvector.ll | 48 | 
2 files changed, 57 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cb19c0a4e0f..4d85532a81b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -475,7 +475,8 @@ namespace {      SDValue reduceBuildVecToShuffle(SDNode *N);      SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,                                    ArrayRef<int> VectorMask, SDValue VecIn1, -                                  SDValue VecIn2, unsigned LeftIdx); +                                  SDValue VecIn2, unsigned LeftIdx, +                                  bool DidSplitVec);      SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);      /// Walk up chain skipping non-aliasing memory nodes, @@ -16416,7 +16417,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {  SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,                                             ArrayRef<int> VectorMask,                                             SDValue VecIn1, SDValue VecIn2, -                                           unsigned LeftIdx) { +                                           unsigned LeftIdx, bool DidSplitVec) {    MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());    SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); @@ -16424,17 +16425,12 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,    EVT InVT1 = VecIn1.getValueType();    EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; -  unsigned Vec2Offset = 0;    unsigned NumElems = VT.getVectorNumElements();    unsigned ShuffleNumElems = NumElems; -  // In case both the input vectors are extracted from same base -  // vector we do not need extra addend (Vec2Offset) while -  // computing shuffle mask. -  if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) || -      !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) || -      !(VecIn1.getOperand(0) == VecIn2.getOperand(0))) -    Vec2Offset = InVT1.getVectorNumElements(); +  // If we artificially split a vector in two already, then the offsets in the +  // operands will all be based off of VecIn1, even those in VecIn2. +  unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();    // We can't generate a shuffle node with mismatched input and output types.    // Try to make the types match the type of the output. @@ -16693,6 +16689,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {    // vector, then split the vector efficiently based on the maximum    // vector access index and adjust the VectorMask and    // VecIn accordingly. +  bool DidSplitVec = false;    if (VecIn.size() == 2) {      unsigned MaxIndex = 0;      unsigned NearestPow2 = 0; @@ -16723,6 +16720,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {          VecIn.pop_back();          VecIn.push_back(VecIn1);          VecIn.push_back(VecIn2); +        DidSplitVec = true;          for (unsigned i = 0; i < NumElems; i++) {            if (VectorMask[i] <= 0) @@ -16760,7 +16758,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {          (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();      if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, -                                                VecRight, LeftIdx)) +                                                VecRight, LeftIdx, DidSplitVec))        Shuffles.push_back(Shuffle);      else        return SDValue(); diff --git a/llvm/test/CodeGen/X86/shuffle-extract-subvector.ll b/llvm/test/CodeGen/X86/shuffle-extract-subvector.ll new file mode 100644 index 00000000000..02ec475b05d --- /dev/null +++ b/llvm/test/CodeGen/X86/shuffle-extract-subvector.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +define void @f(<4 x half>* %a, <4 x half>* %b, <8 x half>* %c) { +; CHECK-LABEL: f: +; CHECK:       # %bb.0: +; CHECK-NEXT:    movzwl (%rdi), %r8d +; CHECK-NEXT:    movzwl 2(%rdi), %r9d +; CHECK-NEXT:    movzwl 4(%rdi), %r11d +; CHECK-NEXT:    movzwl 6(%rdi), %edi +; CHECK-NEXT:    movzwl (%rsi), %r10d +; CHECK-NEXT:    movzwl 2(%rsi), %ecx +; CHECK-NEXT:    movzwl 4(%rsi), %eax +; CHECK-NEXT:    movzwl 6(%rsi), %esi +; CHECK-NEXT:    movw %si, 14(%rdx) +; CHECK-NEXT:    movw %di, 12(%rdx) +; CHECK-NEXT:    movw %ax, 10(%rdx) +; CHECK-NEXT:    movw %r11w, 8(%rdx) +; CHECK-NEXT:    movw %cx, 6(%rdx) +; CHECK-NEXT:    movw %r9w, 4(%rdx) +; CHECK-NEXT:    movw %r10w, 2(%rdx) +; CHECK-NEXT:    movw %r8w, (%rdx) +; CHECK-NEXT:    retq +  %tmp4 = load <4 x half>, <4 x half>* %a +  %tmp5 = load <4 x half>, <4 x half>* %b +  %tmp7 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 0, i32 4> +  %tmp8 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 1, i32 5> +  %tmp9 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 2, i32 6> +  %tmp10 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 3, i32 7> +  %tmp11 = extractelement <2 x half> %tmp7, i32 0 +  %tmp12 = insertelement <8 x half> undef, half %tmp11, i32 0 +  %tmp13 = extractelement <2 x half> %tmp7, i32 1 +  %tmp14 = insertelement <8 x half> %tmp12, half %tmp13, i32 1 +  %tmp15 = extractelement <2 x half> %tmp8, i32 0 +  %tmp16 = insertelement <8 x half> %tmp14, half %tmp15, i32 2 +  %tmp17 = extractelement <2 x half> %tmp8, i32 1 +  %tmp18 = insertelement <8 x half> %tmp16, half %tmp17, i32 3 +  %tmp19 = extractelement <2 x half> %tmp9, i32 0 +  %tmp20 = insertelement <8 x half> %tmp18, half %tmp19, i32 4 +  %tmp21 = extractelement <2 x half> %tmp9, i32 1 +  %tmp22 = insertelement <8 x half> %tmp20, half %tmp21, i32 5 +  %tmp23 = extractelement <2 x half> %tmp10, i32 0 +  %tmp24 = insertelement <8 x half> %tmp22, half %tmp23, i32 6 +  %tmp25 = extractelement <2 x half> %tmp10, i32 1 +  %tmp26 = insertelement <8 x half> %tmp24, half %tmp25, i32 7 +  store <8 x half> %tmp26, <8 x half>* %c +  ret void +}  | 

