diff options
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir | 42 |
2 files changed, 59 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 873378a97c4..504f0eb7600 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -189,7 +189,10 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, LLT DstType = MRI.getType(MI.getOperand(0).getReg()); Register Src1 = MI.getOperand(1).getReg(); LLT SrcType = MRI.getType(Src1); - unsigned DstNumElts = DstType.getNumElements(); + // As bizarre as it may look, shuffle vector can actually produce + // scalar! This is because at the IR level a <1 x ty> shuffle + // vector is perfectly valid. + unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; // If the resulting vector is smaller than the size of the source @@ -199,7 +202,15 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, // Note: We may still be able to produce a concat_vectors fed by // extract_vector_elt and so on. It is less clear that would // be better though, so don't bother for now. - if (DstNumElts < 2 * SrcNumElts) + // + // If the destination is a scalar, the size of the sources doesn't + // matter. we will lower the shuffle to a plain copy. This will + // work only if the source and destination have the same size. But + // that's covered by the next condition. + // + // TODO: If the size between the source and destination don't match + // we could still emit an extract vector element in that case. + if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) return false; // Check that the shuffle mask can be broken evenly between the @@ -254,7 +265,10 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, Builder.setInsertPt(*MI.getParent(), MI); Register NewDstReg = MRI.cloneVirtualRegister(DstReg); - Builder.buildMerge(NewDstReg, Ops); + if (Ops.size() == 1) + Builder.buildCopy(NewDstReg, Ops[0]); + else + Builder.buildMerge(NewDstReg, Ops); MI.eraseFromParent(); replaceRegWith(MRI, DstReg, NewDstReg); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir index 8694bff4746..5166f894efa 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir @@ -414,3 +414,45 @@ body: | %6:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,-1,-1,1) RET_ReallyLR implicit %6 ... + +# Check that shuffle_vector on scalars gets combined into a plain +# copy when the resulting type is a scalar as well and the sizes +# are compatible. +--- +name: shuffle_vector_on_scalars_to_copy_ptr +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: shuffle_vector_on_scalars_to_copy_ptr + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: RET_ReallyLR implicit [[COPY]](p0) + %0:_(p0) = COPY $x0 + %6:_(p0) = G_SHUFFLE_VECTOR %0, %0, shufflemask(0) + RET_ReallyLR implicit %6 +... + +# Check that shuffle_vector on vector doesn't get combined +# when the resulting type is a scalar. +# We should be able to replace this by an extract vector element, +# but that's not implemented yet. +--- +name: shuffle_vector_to_copy_neg +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: shuffle_vector_to_copy_neg + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $x1 + ; CHECK: [[SHUF:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(1) + ; CHECK: RET_ReallyLR implicit [[SHUF]](s32) + %0:_(<2 x s32>) = COPY $x0 + %1:_(<2 x s32>) = COPY $x1 + %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1) + RET_ReallyLR implicit %6 +... |