summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-12-21 10:33:58 -0500
committerSanjay Patel <spatel@rotateright.com>2019-12-22 08:37:03 -0500
commit0b38af89e2c0adc13a6efb1dd04485229ef0d1c6 (patch)
tree32866523a69556361ad699c1355bb7271e516133 /llvm/lib/Target/AArch64
parent218601ada1496f920fadddae9a21f7d6ccf48da5 (diff)
downloadbcm5719-llvm-0b38af89e2c0adc13a6efb1dd04485229ef0d1c6.tar.gz
bcm5719-llvm-0b38af89e2c0adc13a6efb1dd04485229ef0d1c6.zip
[AArch64] match splat of bitcasted extract subvector to DUPLANE
This is another potential regression exposed by D63815. Here we peek through a bitcast to find an extract subvector and scale the splat offset based on that: splat (bitcast (extract X, C)), LaneC --> duplane (bitcast X), LaneC' Differential Revision: https://reviews.llvm.org/D71672
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp50
1 files changed, 43 insertions, 7 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a3dd2e65a12..d01979d8be8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7086,19 +7086,55 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Otherwise, duplicate from the lane of the input vector.
unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
- // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
- // to make a vector of the same size as this SHUFFLE. We can ignore the
- // extract entirely, and canonicalise the concat using WidenVector.
- if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
+ // Try to eliminate a bitcasted extract subvector before a DUPLANE.
+ auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
+ // Match: dup (bitcast (extract_subv X, C)), LaneC
+ if (BitCast.getOpcode() != ISD::BITCAST ||
+ BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+
+ // The extract index must align in the destination type. That may not
+ // happen if the bitcast is from narrow to wide type.
+ SDValue Extract = BitCast.getOperand(0);
+ unsigned ExtIdx = Extract.getConstantOperandVal(1);
+ unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
+ unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
+ unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
+ if (ExtIdxInBits % CastedEltBitWidth != 0)
+ return false;
+
+ // Update the lane value by offsetting with the scaled extract index.
+ LaneC += ExtIdxInBits / CastedEltBitWidth;
+
+ // Determine the casted vector type of the wide vector input.
+ // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
+ // Examples:
+ // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
+ // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
+ unsigned SrcVecNumElts =
+ Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
+ CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
+ SrcVecNumElts);
+ return true;
+ };
+ MVT CastVT;
+ if (getScaledOffsetDup(V1, Lane, CastVT)) {
+ V1 = DAG.getBitcast(CastVT, V1.getOperand(0).getOperand(0));
+ } else if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ // The lane is incremented by the index of the extract.
+ // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
+ Lane += V1.getConstantOperandVal(1);
V1 = V1.getOperand(0);
} else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
+ // The lane is decremented if we are splatting from the 2nd operand.
+ // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
Lane -= Idx * VT.getVectorNumElements() / 2;
V1 = WidenVector(V1.getOperand(Idx), DAG);
- } else if (VT.getSizeInBits() == 64)
+ } else if (VT.getSizeInBits() == 64) {
+ // Widen the operand to 128-bit register with undef.
V1 = WidenVector(V1, DAG);
-
+ }
return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
}
OpenPOWER on IntegriCloud