From cfaf663a356d1c97b42a62f42c34f316bf0edb49 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 24 Feb 2019 19:57:52 +0000 Subject: [X86] Combine zext(packus(x),packus(y)) -> concat(x,y) (PR39637) Its proving tricky to combine shuffles across multiple vector sizes, so for now I'm adding this more specific combine - the pattern is common enough to be worth it as a first step. llvm-svn: 354757 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'llvm/lib') diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ae7fd6c93b3..dc50eca4258 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40328,6 +40328,20 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget)) return R; + // TODO: Combine with any target/faux shuffle. + if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 && + VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + unsigned NumSrcElts = N00.getValueType().getVectorNumElements(); + unsigned NumSrcEltBits = N00.getScalarValueSizeInBits(); + APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2); + if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) && + (N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) { + return concatSubVectors(N00, N01, VT, NumSrcElts * 2, DAG, dl, 128); + } + } + return SDValue(); } -- cgit v1.2.3