diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-05 12:56:53 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-05 12:56:53 +0000 |
| commit | 886a55eaa05ddfecaf41b86d1735c503b3b17bda (patch) | |
| tree | 23a3ebfcdd1eb33495d0ca78dfb45cca643ea93d /llvm/lib/Target | |
| parent | 2121a4f7335a9e4985997d4d880c11c588b48a27 (diff) | |
| download | bcm5719-llvm-886a55eaa05ddfecaf41b86d1735c503b3b17bda.tar.gz bcm5719-llvm-886a55eaa05ddfecaf41b86d1735c503b3b17bda.zip | |
[X86][AVX] combineX86ShuffleChain - combine shuffle(extractsubvector(x),extractsubvector(y))
We already handle the case where we combine shuffle(extractsubvector(x),extractsubvector(x)), this relaxes the requirement to permit different sources as long as they have the same value type.
This causes a couple of cases where the VPERMV3 binary shuffles occur at a wider width than before, which I intend to improve in future commits - but as only the subvector's mask indices are defined, these will broadcast so we don't see any increase in constant size.
llvm-svn: 362599
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 64585c8de0a..a6aa2b77990 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32091,19 +32091,28 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, isa<ConstantSDNode>(V2.getOperand(1))) { SDValue Src1 = V1.getOperand(0); SDValue Src2 = V2.getOperand(0); - if (Src1 == Src2) { + if (Src1.getValueType() == Src2.getValueType()) { unsigned Offset1 = V1.getConstantOperandVal(1); unsigned Offset2 = V2.getConstantOperandVal(1); assert(((Offset1 % VT1.getVectorNumElements()) == 0 || (Offset2 % VT2.getVectorNumElements()) == 0 || (Src1.getValueSizeInBits() % RootSizeInBits) == 0) && "Unexpected subvector extraction"); + unsigned Scale = Src1.getValueSizeInBits() / RootSizeInBits; + // Convert extraction indices to mask size. Offset1 /= VT1.getVectorNumElements(); Offset2 /= VT2.getVectorNumElements(); Offset1 *= NumMaskElts; Offset2 *= NumMaskElts; + SmallVector<SDValue, 2> NewInputs; + NewInputs.push_back(Src1); + if (Src1 != Src2) { + NewInputs.push_back(Src2); + Offset2 += Scale * NumMaskElts; + } + // Create new mask for larger type. SmallVector<int, 64> NewMask(Mask); for (int &M : NewMask) { @@ -32114,10 +32123,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, else M = (M - NumMaskElts) + Offset2; } - unsigned Scale = Src1.getValueSizeInBits() / RootSizeInBits; NewMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef); - SDValue NewInputs[] = {Src1}; if (SDValue Res = combineX86ShuffleChain( NewInputs, Src1, NewMask, Depth, HasVariableMask, AllowVariableMask, DAG, Subtarget)) { |

