summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-06-05 12:56:53 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-06-05 12:56:53 +0000
commit886a55eaa05ddfecaf41b86d1735c503b3b17bda (patch)
tree23a3ebfcdd1eb33495d0ca78dfb45cca643ea93d /llvm/lib/Target
parent2121a4f7335a9e4985997d4d880c11c588b48a27 (diff)
downloadbcm5719-llvm-886a55eaa05ddfecaf41b86d1735c503b3b17bda.tar.gz
bcm5719-llvm-886a55eaa05ddfecaf41b86d1735c503b3b17bda.zip
[X86][AVX] combineX86ShuffleChain - combine shuffle(extractsubvector(x),extractsubvector(y))
We already handle the case where we combine shuffle(extractsubvector(x),extractsubvector(x)), this relaxes the requirement to permit different sources as long as they have the same value type. This causes a couple of cases where the VPERMV3 binary shuffles occur at a wider width than before, which I intend to improve in future commits - but as only the subvector's mask indices are defined, these will broadcast so we don't see any increase in constant size. llvm-svn: 362599
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp13
1 files changed, 10 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 64585c8de0a..a6aa2b77990 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32091,19 +32091,28 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
isa<ConstantSDNode>(V2.getOperand(1))) {
SDValue Src1 = V1.getOperand(0);
SDValue Src2 = V2.getOperand(0);
- if (Src1 == Src2) {
+ if (Src1.getValueType() == Src2.getValueType()) {
unsigned Offset1 = V1.getConstantOperandVal(1);
unsigned Offset2 = V2.getConstantOperandVal(1);
assert(((Offset1 % VT1.getVectorNumElements()) == 0 ||
(Offset2 % VT2.getVectorNumElements()) == 0 ||
(Src1.getValueSizeInBits() % RootSizeInBits) == 0) &&
"Unexpected subvector extraction");
+ unsigned Scale = Src1.getValueSizeInBits() / RootSizeInBits;
+
// Convert extraction indices to mask size.
Offset1 /= VT1.getVectorNumElements();
Offset2 /= VT2.getVectorNumElements();
Offset1 *= NumMaskElts;
Offset2 *= NumMaskElts;
+ SmallVector<SDValue, 2> NewInputs;
+ NewInputs.push_back(Src1);
+ if (Src1 != Src2) {
+ NewInputs.push_back(Src2);
+ Offset2 += Scale * NumMaskElts;
+ }
+
// Create new mask for larger type.
SmallVector<int, 64> NewMask(Mask);
for (int &M : NewMask) {
@@ -32114,10 +32123,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
else
M = (M - NumMaskElts) + Offset2;
}
- unsigned Scale = Src1.getValueSizeInBits() / RootSizeInBits;
NewMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
- SDValue NewInputs[] = {Src1};
if (SDValue Res = combineX86ShuffleChain(
NewInputs, Src1, NewMask, Depth, HasVariableMask,
AllowVariableMask, DAG, Subtarget)) {
OpenPOWER on IntegriCloud