diff options
| author | Evandro Menezes <e.menezes@samsung.com> | 2018-02-20 20:31:45 +0000 |
|---|---|---|
| committer | Evandro Menezes <e.menezes@samsung.com> | 2018-02-20 20:31:45 +0000 |
| commit | 72f3983633e665dfe9dbee42adedf9601e3dac7a (patch) | |
| tree | f3a84d1a360ceae2a3b68b6bc792ac7f5f7d26f5 /llvm | |
| parent | 2c2ed3cf031e022ce939254034666b9f8a1a2f89 (diff) | |
| download | bcm5719-llvm-72f3983633e665dfe9dbee42adedf9601e3dac7a.tar.gz bcm5719-llvm-72f3983633e665dfe9dbee42adedf9601e3dac7a.zip | |
[AArch64] Refactor instructions using SIMD immediates
Get rid of icky goto loops and make the code easier to maintain. Otherwise,
NFC.
Restore r324903 and fix PR36369.
Differentail revision: https://reviews.llvm.org/D43364
llvm-svn: 325621
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 649 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/build-one-lane.ll | 90 |
2 files changed, 371 insertions, 368 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9330d7cbbe4..8829d1a6d96 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6244,96 +6244,236 @@ static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, return false; } +// Try 64-bit splatted SIMD immediate. +static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, + const APInt &Bits) { + if (Bits.getHiBits(64) == Bits.getLoBits(64)) { + uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); + EVT VT = Op.getValueType(); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64; + + if (AArch64_AM::isAdvSIMDModImmType10(Value)) { + Value = AArch64_AM::encodeAdvSIMDModImmType10(Value); + + SDLoc dl(Op); + SDValue Mov = DAG.getNode(NewOp, dl, MovTy, + DAG.getConstant(Value, dl, MVT::i32)); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); + } + } + + return SDValue(); +} + +// Try 32-bit splatted SIMD immediate. +static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, + const APInt &Bits, + const SDValue *LHS = nullptr) { + if (Bits.getHiBits(64) == Bits.getLoBits(64)) { + uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); + EVT VT = Op.getValueType(); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + bool isAdvSIMDModImm = false; + uint64_t Shift; + + if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType1(Value); + Shift = 0; + } + else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType2(Value); + Shift = 8; + } + else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType3(Value); + Shift = 16; + } + else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType4(Value); + Shift = 24; + } + + if (isAdvSIMDModImm) { + SDLoc dl(Op); + SDValue Mov; + + if (LHS) + Mov = DAG.getNode(NewOp, dl, MovTy, *LHS, + DAG.getConstant(Value, dl, MVT::i32), + DAG.getConstant(Shift, dl, MVT::i32)); + else + Mov = DAG.getNode(NewOp, dl, MovTy, + DAG.getConstant(Value, dl, MVT::i32), + DAG.getConstant(Shift, dl, MVT::i32)); + + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); + } + } + + return SDValue(); +} + +// Try 16-bit splatted SIMD immediate. +static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, + const APInt &Bits, + const SDValue *LHS = nullptr) { + if (Bits.getHiBits(64) == Bits.getLoBits(64)) { + uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); + EVT VT = Op.getValueType(); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; + bool isAdvSIMDModImm = false; + uint64_t Shift; + + if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType5(Value); + Shift = 0; + } + else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType6(Value); + Shift = 8; + } + + if (isAdvSIMDModImm) { + SDLoc dl(Op); + SDValue Mov; + + if (LHS) + Mov = DAG.getNode(NewOp, dl, MovTy, *LHS, + DAG.getConstant(Value, dl, MVT::i32), + DAG.getConstant(Shift, dl, MVT::i32)); + else + Mov = DAG.getNode(NewOp, dl, MovTy, + DAG.getConstant(Value, dl, MVT::i32), + DAG.getConstant(Shift, dl, MVT::i32)); + + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); + } + } + + return SDValue(); +} + +// Try 32-bit splatted SIMD immediate with shifted ones. +static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, + SelectionDAG &DAG, const APInt &Bits) { + if (Bits.getHiBits(64) == Bits.getLoBits(64)) { + uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); + EVT VT = Op.getValueType(); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + bool isAdvSIMDModImm = false; + uint64_t Shift; + + if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType7(Value); + Shift = 264; + } + else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType8(Value); + Shift = 272; + } + + if (isAdvSIMDModImm) { + SDLoc dl(Op); + SDValue Mov = DAG.getNode(NewOp, dl, MovTy, + DAG.getConstant(Value, dl, MVT::i32), + DAG.getConstant(Shift, dl, MVT::i32)); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); + } + } + + return SDValue(); +} + +// Try 8-bit splatted SIMD immediate. +static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, + const APInt &Bits) { + if (Bits.getHiBits(64) == Bits.getLoBits(64)) { + uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); + EVT VT = Op.getValueType(); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; + + if (AArch64_AM::isAdvSIMDModImmType9(Value)) { + Value = AArch64_AM::encodeAdvSIMDModImmType9(Value); + + SDLoc dl(Op); + SDValue Mov = DAG.getNode(NewOp, dl, MovTy, + DAG.getConstant(Value, dl, MVT::i32)); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); + } + } + + return SDValue(); +} + +// Try FP splatted SIMD immediate. +static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, + const APInt &Bits) { + if (Bits.getHiBits(64) == Bits.getLoBits(64)) { + uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); + EVT VT = Op.getValueType(); + bool isWide = (VT.getSizeInBits() == 128); + MVT MovTy; + bool isAdvSIMDModImm = false; + + if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType11(Value); + MovTy = isWide ? MVT::v4f32 : MVT::v2f32; + } + else if (isWide && + (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) { + Value = AArch64_AM::encodeAdvSIMDModImmType12(Value); + MovTy = MVT::v2f64; + } + + if (isAdvSIMDModImm) { + SDLoc dl(Op); + SDValue Mov = DAG.getNode(NewOp, dl, MovTy, + DAG.getConstant(Value, dl, MVT::i32)); + return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); + } + } + + return SDValue(); +} + SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, SelectionDAG &DAG) const { - BuildVectorSDNode *BVN = - dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode()); SDValue LHS = Op.getOperand(0); - SDLoc dl(Op); EVT VT = Op.getValueType(); + BuildVectorSDNode *BVN = + dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode()); + if (!BVN) { + // AND commutes, so try swapping the operands. + LHS = Op.getOperand(1); + BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode()); + } if (!BVN) return Op; - APInt CnstBits(VT.getSizeInBits(), 0); + APInt DefBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, CnstBits, UndefBits)) { - // We only have BIC vector immediate instruction, which is and-not. - CnstBits = ~CnstBits; - - // We make use of a little bit of goto ickiness in order to avoid having to - // duplicate the immediate matching logic for the undef toggled case. - bool SecondTry = false; - AttemptModImm: - - if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { - CnstBits = CnstBits.zextOrTrunc(64); - uint64_t CnstVal = CnstBits.getZExtValue(); - - if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } + if (resolveBuildVector(BVN, DefBits, UndefBits)) { + SDValue NewOp; - if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(8, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(16, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(24, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(8, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } + // We only have BIC vector immediate instruction, which is and-not. + DefBits = ~DefBits; + if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, Op, DAG, + DefBits, &LHS)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, Op, DAG, + DefBits, &LHS))) + return NewOp; + else { + DefBits = ~UndefBits; + if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, Op, DAG, + DefBits, &LHS)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, Op, DAG, + DefBits, &LHS))) + return NewOp; } - - if (SecondTry) - goto FailedModImm; - SecondTry = true; - CnstBits = ~UndefBits; - goto AttemptModImm; } -// We can always fall back to a non-immediate AND. -FailedModImm: + // We can always fall back to a non-immediate AND. return Op; } @@ -6444,96 +6584,40 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, return Res; } - BuildVectorSDNode *BVN = - dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode()); - SDValue LHS = Op.getOperand(1); - SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); EVT VT = Op.getValueType(); - // OR commutes, so try swapping the operands. + BuildVectorSDNode *BVN = + dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode()); if (!BVN) { - LHS = Op.getOperand(0); - BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode()); + // OR commutes, so try swapping the operands. + LHS = Op.getOperand(1); + BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode()); } if (!BVN) return Op; - APInt CnstBits(VT.getSizeInBits(), 0); + APInt DefBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, CnstBits, UndefBits)) { - // We make use of a little bit of goto ickiness in order to avoid having to - // duplicate the immediate matching logic for the undef toggled case. - bool SecondTry = false; - AttemptModImm: - - if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { - CnstBits = CnstBits.zextOrTrunc(64); - uint64_t CnstVal = CnstBits.getZExtValue(); - - if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(8, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(16, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(24, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(8, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } + if (resolveBuildVector(BVN, DefBits, UndefBits)) { + SDValue NewOp; + + if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, + DefBits, &LHS)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, + DefBits, &LHS))) + return NewOp; + else { + DefBits = UndefBits; + if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, + DefBits, &LHS)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, + DefBits, &LHS))) + return NewOp; } - - if (SecondTry) - goto FailedModImm; - SecondTry = true; - CnstBits = UndefBits; - goto AttemptModImm; } -// We can always fall back to a non-immediate OR. -FailedModImm: + // We can always fall back to a non-immediate OR. return Op; } @@ -6565,222 +6649,51 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); + Op = NormalizeBuildVector(Op, DAG); BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); - - APInt CnstBits(VT.getSizeInBits(), 0); + APInt DefBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, CnstBits, UndefBits)) { - // We make use of a little bit of goto ickiness in order to avoid having to - // duplicate the immediate matching logic for the undef toggled case. - bool SecondTry = false; - AttemptModImm: - - if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { - CnstBits = CnstBits.zextOrTrunc(64); - uint64_t CnstVal = CnstBits.getZExtValue(); - - // Certain magic vector constants (used to express things like NOT - // and NEG) are passed through unmodified. This allows codegen patterns - // for these operations to match. Special-purpose patterns will lower - // these immediates to MOVIs if it proves necessary. - if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL)) - return Op; - - // The many faces of MOVI... - if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal); - if (VT.getSizeInBits() == 128) { - SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64, - DAG.getConstant(CnstVal, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - // Support the V64 version via subregister insertion. - SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64, - DAG.getConstant(CnstVal, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(8, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(16, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(24, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(8, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(264, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(272, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; - SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - // The few faces of FMOV... - if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32; - SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) && - VT.getSizeInBits() == 128) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal); - SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64, - DAG.getConstant(CnstVal, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - // The many faces of MVNI... - CnstVal = ~CnstVal; - if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(8, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(16, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(24, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(8, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(264, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - - if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { - CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, - DAG.getConstant(CnstVal, dl, MVT::i32), - DAG.getConstant(272, dl, MVT::i32)); - return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); - } - } - - if (SecondTry) - goto FailedModImm; - SecondTry = true; - CnstBits = UndefBits; - goto AttemptModImm; + if (resolveBuildVector(BVN, DefBits, UndefBits)) { + // Certain magic vector constants (used to express things like NOT + // and NEG) are passed through unmodified. This allows codegen patterns + // for these operations to match. Special-purpose patterns will lower + // these immediates to MOVI if it proves necessary. + uint64_t DefVal = DefBits.zextOrTrunc(64).getZExtValue(); + if (DefBits.getHiBits(64) == DefBits.getLoBits(64) && + VT.isInteger() && (DefVal == 0 || DefVal == UINT64_MAX)) + return Op; + + SDValue NewOp; + if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits))) + return NewOp; + + DefBits = ~DefBits; + if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits))) + return NewOp; + + DefBits = UndefBits; + if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits))) + return NewOp; + + DefBits = ~UndefBits; + if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) || + (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits))) + return NewOp; } -FailedModImm: // Scan through the operands to find some interesting properties we can // exploit: diff --git a/llvm/test/CodeGen/AArch64/build-one-lane.ll b/llvm/test/CodeGen/AArch64/build-one-lane.ll index 722d62437a3..a134964e74e 100644 --- a/llvm/test/CodeGen/AArch64/build-one-lane.ll +++ b/llvm/test/CodeGen/AArch64/build-one-lane.ll @@ -7,6 +7,7 @@ define <8 x i8> @v8i8(i8 %t, i8 %s) nounwind { %v = insertelement <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 7 ret <8 x i8> %v +; CHECK-LABEL: v8i8: ; CHECK: movi v[[R:[0-9]+]].8b, #0 ; CHECK: mov v[[R]].b[7], w{{[0-9]+}} } @@ -15,6 +16,7 @@ define <16 x i8> @v16i8(i8 %t, i8 %s) nounwind { %v = insertelement <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 15 ret <16 x i8> %v +; CHECK-LABEL: v16i8: ; CHECK: movi v[[R:[0-9]+]].16b, #0 ; CHECK: mov v[[R]].b[15], w{{[0-9]+}} } @@ -23,6 +25,7 @@ define <4 x i16> @v4i16(i16 %t, i16 %s) nounwind { %v = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 3 ret <4 x i16> %v +; CHECK-LABEL: v4i16: ; CHECK: movi v[[R:[0-9]+]].4h, #0 ; CHECK: mov v[[R]].h[3], w{{[0-9]+}} } @@ -31,6 +34,7 @@ define <8 x i16> @v8i16(i16 %t, i16 %s) nounwind { %v = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 7 ret <8 x i16> %v +; CHECK-LABEL: v8i16: ; CHECK: movi v[[R:[0-9]+]].8h, #0 ; CHECK: mov v[[R]].h[7], w{{[0-9]+}} } @@ -39,6 +43,7 @@ define <2 x i32> @v2i32(i32 %t, i32 %s) nounwind { %v = insertelement <2 x i32> <i32 0, i32 undef>, i32 %s, i32 1 ret <2 x i32> %v +; CHECK-LABEL: v2i32: ; CHECK: movi v[[R:[0-9]+]].2s, #0 ; CHECK: mov v[[R]].s[1], w{{[0-9]+}} } @@ -47,6 +52,7 @@ define <4 x i32> @v4i32(i32 %t, i32 %s) nounwind { %v = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, i32 %s, i32 3 ret <4 x i32> %v +; CHECK-LABEL: v4i32: ; CHECK: movi v[[R:[0-9]+]].4s, #0 ; CHECK: mov v[[R]].s[3], w{{[0-9]+}} } @@ -55,6 +61,7 @@ define <2 x i64> @v2i64(i64 %t, i64 %s) nounwind { %v = insertelement <2 x i64> <i64 0, i64 undef>, i64 %s, i32 1 ret <2 x i64> %v +; CHECK-LABEL: v2i64: ; CHECK: movi v[[R:[0-9]+]].2d, #0 ; CHECK: mov v[[R]].d[1], x{{[0-9]+}} } @@ -63,6 +70,7 @@ define <2 x float> @v2f32(float %t, float %s) nounwind { %v = insertelement <2 x float> <float 0.0, float undef>, float %s, i32 1 ret <2 x float> %v +; CHECK-LABEL: v2f32: ; CHECK: movi v[[R:[0-9]+]].2s, #0 ; CHECK: mov v[[R]].s[1], v{{[0-9]+}}.s[0] } @@ -71,6 +79,7 @@ define <4 x float> @v4f32(float %t, float %s) nounwind { %v = insertelement <4 x float> <float 0.0, float 0.0, float 0.0, float undef>, float %s, i32 3 ret <4 x float> %v +; CHECK-LABEL: v4f32: ; CHECK: movi v[[R:[0-9]+]].4s, #0 ; CHECK: mov v[[R]].s[3], v{{[0-9]+}}.s[0] } @@ -79,6 +88,87 @@ define <2 x double> @v2f64(double %t, double %s) nounwind { %v = insertelement <2 x double> <double 0.0, double undef>, double %s, i32 1 ret <2 x double> %v +; CHECK-LABEL: v2f64: ; CHECK: movi v[[R:[0-9]+]].2d, #0 ; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0] } + +define void @v8i8st(<8 x i8>* %p, <8 x i8> %s) nounwind { + store <8 x i8> <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>, <8 x i8>* %p, align 8 + ret void + +; CHECK-LABEL: v8i8st: +; CHECK: movi v[[R:[0-9]+]].8b, #64 +} + +define void @v16i8st(<16 x i8>* %p, <16 x i8> %s) nounwind { + store <16 x i8> <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>, <16 x i8>* %p, align 16 + ret void + +; CHECK-LABEL: v16i8st: +; CHECK: movi v[[R:[0-9]+]].16b, #64 +} + +define void @v4i16st(<4 x i16>* %p, <4 x i16> %s) nounwind { + store <4 x i16> <i16 16384, i16 16384, i16 16384, i16 16384>, <4 x i16>* %p, align 8 + ret void + +; CHECK-LABEL: v4i16st: +; CHECK: movi v[[R:[0-9]+]].4h, #64, lsl #8 +} + +define void @v8i16st(<8 x i16>* %p, <8 x i16> %s) nounwind { + store <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>, <8 x i16>* %p, align 16 + ret void + +; CHECK-LABEL: v8i16st: +; CHECK: movi v[[R:[0-9]+]].8h, #64, lsl #8 +} + +define void @v2i32st(<2 x i32>* %p, <2 x i32> %s) nounwind { + store <2 x i32> <i32 1073741824, i32 1073741824>, <2 x i32>* %p, align 8 + ret void + +; CHECK-LABEL: v2i32st: +; CHECK: movi v[[R:[0-9]+]].2s, #64, lsl #24 +} + +define void @v4i32st(<4 x i32>* %p, <4 x i32> %s) nounwind { + store <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, <4 x i32>* %p, align 16 + ret void + +; CHECK-LABEL: v4i32st: +; CHECK: movi v[[R:[0-9]+]].4s, #64, lsl #24 +} + +define void @v2i64st(<2 x i64>* %p, <2 x i64> %s) nounwind { + store <2 x i64> <i64 4611686018427387904, i64 4611686018427387904>, <2 x i64>* %p, align 16 + ret void + +; CHECK-LABEL: v2i64st +; CHECK: fmov v[[R:[0-9]+]].2d, #2.0 +} + +define void @v2f32st(<2 x float>* %p, <2 x float> %s) nounwind { + store <2 x float> <float 2.0, float 2.0>, <2 x float>* %p, align 8 + ret void + +; CHECK-LABEL: v2f32st +; CHECK: movi v[[R:[0-9]+]].2s, #64, lsl #24 +} + +define void @v4f32st(<4 x float>* %p, <4 x float> %s) nounwind { + store <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, <4 x float>* %p, align 16 + ret void + +; CHECK-LABEL: v4f32st: +; CHECK: movi v[[R:[0-9]+]].4s, #64, lsl #24 +} + +define void @v2f64st(<2 x double>* %p, <2 x double> %s) nounwind { + store <2 x double> <double 2.0, double 2.0>, <2 x double>* %p, align 16 + ret void + +; CHECK-LABEL: v2f64st: +; CHECK: fmov v[[R:[0-9]+]].2d, #2.0 +} |

