diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 7 |
2 files changed, 18 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 7c67080def2..72dc5090e6c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2708,11 +2708,21 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::ANY_EXTEND: { + SDValue X = LHS->getOperand(0); + + if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 && + isTypeLegal(MVT::v2i16)) { + // Prefer build_vector as the canonical form if packed types are legal. + // (shl ([asz]ext i16:x), 16 -> build_vector 0, x + SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL, + { DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) }); + return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec); + } + // shl (ext x) => zext (shl x), if shift does not overflow int if (VT != MVT::i64) break; KnownBits Known; - SDValue X = LHS->getOperand(0); DAG.computeKnownBits(X, Known); unsigned LZ = Known.countMinLeadingZeros(); if (LZ < RHSVal) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index bcbdd5f8181..5945f963710 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1340,6 +1340,13 @@ def : Pat < (v2i16 (S_PACK_LL_B32_B16 $src0, $src1)) >; +// COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs +// from S_LSHL_B32's multiple outputs from implicit scc def. +def : Pat < + (v2i16 (build_vector (i16 0), i16:$src1)), + (v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0)) +>; + // With multiple uses of the shift, this will duplicate the shift and // increase register pressure. def : Pat < |