summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp67
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td54
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
4 files changed, 40 insertions, 85 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index b4664f49201..ec0a0ace36f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -330,6 +330,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f3e69d4d551..6149d82d38f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -714,8 +714,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
- setOperationAction(ISD::MULHS, VT, Custom);
- setOperationAction(ISD::MULHU, VT, Custom);
+ setOperationAction(ISD::MULHS, VT, Legal);
+ setOperationAction(ISD::MULHU, VT, Legal);
} else {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
@@ -2670,66 +2670,6 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
-// Lower vector multiply high (ISD::MULHS and ISD::MULHU).
-static SDValue LowerMULH(SDValue Op, SelectionDAG &DAG) {
- // Multiplications are only custom-lowered for 128-bit vectors so that
- // {S,U}MULL{2} can be detected. Otherwise v2i64 multiplications are not
- // legal.
- EVT VT = Op.getValueType();
- assert(VT.is128BitVector() && VT.isInteger() &&
- "unexpected type for custom-lowering ISD::MULH{U,S}");
-
- SDValue V0 = Op.getOperand(0);
- SDValue V1 = Op.getOperand(1);
-
- SDLoc DL(Op);
-
- EVT ExtractVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
-
- // We turn (V0 mulhs/mulhu V1) to:
- //
- // (uzp2 (smull (extract_subvector (ExtractVT V128:V0, (i64 0)),
- // (extract_subvector (ExtractVT V128:V1, (i64 0))))),
- // (smull (extract_subvector (ExtractVT V128:V0, (i64 VMull2Idx)),
- // (extract_subvector (ExtractVT V128:V2, (i64 VMull2Idx))))))
- //
- // Where ExtractVT is a subvector with half number of elements, and
- // VMullIdx2 is the index of the middle element (the high part).
- //
- // The vector hight part extract and multiply will be matched against
- // {S,U}MULL{v16i8_v8i16,v8i16_v4i32,v4i32_v2i64} which in turn will
- // issue a {s}mull2 instruction.
- //
- // This basically multiply the lower subvector with '{s,u}mull', the high
- // subvector with '{s,u}mull2', and shuffle both results high part in
- // resulting vector.
- unsigned Mull2VectorIdx = VT.getVectorNumElements () / 2;
- SDValue VMullIdx = DAG.getConstant(0, DL, MVT::i64);
- SDValue VMull2Idx = DAG.getConstant(Mull2VectorIdx, DL, MVT::i64);
-
- SDValue VMullV0 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMullIdx);
- SDValue VMullV1 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMullIdx);
-
- SDValue VMull2V0 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMull2Idx);
- SDValue VMull2V1 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMull2Idx);
-
- unsigned MullOpc = Op.getOpcode() == ISD::MULHS ? AArch64ISD::SMULL
- : AArch64ISD::UMULL;
-
- EVT MullVT = ExtractVT.widenIntegerVectorElementType(*DAG.getContext());
- SDValue Mull = DAG.getNode(MullOpc, DL, MullVT, VMullV0, VMullV1);
- SDValue Mull2 = DAG.getNode(MullOpc, DL, MullVT, VMull2V0, VMull2V1);
-
- Mull = DAG.getNode(ISD::BITCAST, DL, VT, Mull);
- Mull2 = DAG.getNode(ISD::BITCAST, DL, VT, Mull2);
-
- return DAG.getNode(AArch64ISD::UZP2, DL, VT, Mull, Mull2);
-}
-
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -2932,9 +2872,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
- case ISD::MULHS:
- case ISD::MULHU:
- return LowerMULH(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::STORE:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 84aedaafffe..cfe05822fd2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4108,25 +4108,6 @@ defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
-// Patterns for smull2/umull2.
-multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
- Instruction INST8B, Instruction INST4H, Instruction INST2S> {
- def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm))),
- (INST8B V128:$Rn, V128:$Rm)>;
- def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm))),
- (INST4H V128:$Rn, V128:$Rm)>;
- def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm))),
- (INST2S V128:$Rn, V128:$Rm)>;
-}
-
-defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
- SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
-defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
- UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
-
// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
@@ -5990,6 +5971,41 @@ def : Pat<(i32 (trunc GPR64sp:$src)),
// __builtin_trap() uses the BRK instruction on AArch64.
def : Pat<(trap), (BRK 1)>;
+// Multiply high patterns which multiply the lower subvector using smull/umull
+// and the upper subvector with smull2/umull2. Then shuffle the high the high
+// part of both results together.
+def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
+ (UZP2v16i8
+ (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
+def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
+ (UZP2v8i16
+ (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
+def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
+ (UZP2v4i32
+ (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
+
+def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
+ (UZP2v16i8
+ (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
+def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
+ (UZP2v8i16
+ (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
+def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
+ (UZP2v4i32
+ (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
+ (EXTRACT_SUBREG V128:$Rm, dsub)),
+ (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
+
// Conversions within AdvSIMD types in the same register size are free.
// But because we need a consistent lane ordering, in big endian many
// conversions require one or more REV instructions.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bebbb90ae81..da816c41981 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11529,7 +11529,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
SDValue BC = peekThroughBitcasts(V);
// Also check the simpler case, where we can directly reuse the scalar.
- if (V.getOpcode() == ISD::BUILD_VECTOR ||
+ if ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
OpenPOWER on IntegriCloud