diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 97 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp | 13 | 
2 files changed, 99 insertions, 11 deletions
| diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 08759f28036..62bc010902c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1484,10 +1484,56 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {      auto &MMO = **MI.memoperands_begin();      if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { -      // In the case of G_LOAD, this was a non-extending load already and we're -      // about to lower to the same instruction. -      if (MI.getOpcode() == TargetOpcode::G_LOAD) +      if (MI.getOpcode() == TargetOpcode::G_LOAD) { +        // This load needs splitting into power of 2 sized loads. +        if (DstTy.isVector())            return UnableToLegalize; +        if (isPowerOf2_32(DstTy.getSizeInBits())) +          return UnableToLegalize; // Don't know what we're being asked to do. + +        // Our strategy here is to generate anyextending loads for the smaller +        // types up to next power-2 result type, and then combine the two larger +        // result values together, before truncating back down to the non-pow-2 +        // type. +        // E.g. v1 = i24 load => +        // v2 = i32 load (2 byte) +        // v3 = i32 load (1 byte) +        // v4 = i32 shl v2, 16 +        // v5 = i32 or v4, v3 +        // v1 = i24 trunc v5 +        // By doing this we generate the correct truncate which should get +        // combined away as an artifact with a matching extend. +        uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); +        uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + +        MachineFunction &MF = MIRBuilder.getMF(); +        MachineMemOperand *LargeMMO = +            MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); +        MachineMemOperand *SmallMMO = MF.getMachineMemOperand( +            &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + +        LLT PtrTy = MRI.getType(PtrReg); +        unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); +        LLT AnyExtTy = LLT::scalar(AnyExtSize); +        unsigned LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); +        unsigned SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); +        auto LargeLoad = +            MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO); + +        auto OffsetCst = +            MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); +        unsigned GEPReg = MRI.createGenericVirtualRegister(PtrTy); +        auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); +        auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), +                                              *SmallMMO); + +        auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); +        auto Shift = MIRBuilder.buildShl(AnyExtTy, LargeLoad, ShiftAmt); +        auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, SmallLoad); +        MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); +        MI.eraseFromParent(); +        return Legalized; +      }        MIRBuilder.buildLoad(DstReg, PtrReg, MMO);        MI.eraseFromParent();        return Legalized; @@ -1516,6 +1562,51 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {      return UnableToLegalize;    } +  case TargetOpcode::G_STORE: { +    // Lower a non-power of 2 store into multiple pow-2 stores. +    // E.g. split an i24 store into an i16 store + i8 store. +    // We do this by first extending the stored value to the next largest power +    // of 2 type, and then using truncating stores to store the components. +    // By doing this, likewise with G_LOAD, generate an extend that can be +    // artifact-combined away instead of leaving behind extracts. +    unsigned SrcReg = MI.getOperand(0).getReg(); +    unsigned PtrReg = MI.getOperand(1).getReg(); +    LLT SrcTy = MRI.getType(SrcReg); +    MachineMemOperand &MMO = **MI.memoperands_begin(); +    if (SrcTy.getSizeInBits() != MMO.getSize() /* in bytes */ * 8) +      return UnableToLegalize; +    if (SrcTy.isVector()) +      return UnableToLegalize; +    if (isPowerOf2_32(SrcTy.getSizeInBits())) +      return UnableToLegalize; // Don't know what we're being asked to do. + +    // Extend to the next pow-2. +    const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); +    auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + +    // Obtain the smaller value by shifting away the larger value. +    uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); +    uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; +    auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); +    auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + +    // Generate the GEP and truncating stores. +    LLT PtrTy = MRI.getType(PtrReg); +    auto OffsetCst = +        MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); +    unsigned GEPReg = MRI.createGenericVirtualRegister(PtrTy); +    auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + +    MachineFunction &MF = MIRBuilder.getMF(); +    MachineMemOperand *LargeMMO = +        MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); +    MachineMemOperand *SmallMMO = +        MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); +    MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); +    MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); +    MI.eraseFromParent(); +    return Legalized; +  }    case TargetOpcode::G_CTLZ_ZERO_UNDEF:    case TargetOpcode::G_CTTZ_ZERO_UNDEF:    case TargetOpcode::G_CTLZ: diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 85110b2ec76..8f7a521b559 100644 --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -234,14 +234,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {        .legalForTypesWithMemDesc({{s32, p0, 8, 8},                                   {s32, p0, 16, 8}})        .clampScalar(0, s8, s64) -      .widenScalarToNextPow2(0) -      // TODO: We could support sum-of-pow2's but the lowering code doesn't know -      //       how to do that yet. -      .unsupportedIfMemSizeNotPow2() +      .lowerIfMemSizeNotPow2()        // Lower any any-extending loads left into G_ANYEXT and G_LOAD        .lowerIf([=](const LegalityQuery &Query) {          return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;        }) +      .widenScalarToNextPow2(0)        .clampMaxNumElements(0, s32, 2)        .clampMaxNumElements(0, s64, 1)        .customIf(IsPtrVecPred); @@ -249,6 +247,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {    getActionDefinitionsBuilder(G_STORE)        .legalForTypesWithMemDesc({{s8, p0, 8, 8},                                   {s16, p0, 16, 8}, +                                 {s32, p0, 8, 8}, +                                 {s32, p0, 16, 8},                                   {s32, p0, 32, 8},                                   {s64, p0, 64, 8},                                   {p0, p0, 64, 8}, @@ -259,10 +259,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {                                   {v4s32, p0, 128, 8},                                   {v2s64, p0, 128, 8}})        .clampScalar(0, s8, s64) -      .widenScalarToNextPow2(0) -      // TODO: We could support sum-of-pow2's but the lowering code doesn't know -      //       how to do that yet. -      .unsupportedIfMemSizeNotPow2() +      .lowerIfMemSizeNotPow2()        .lowerIf([=](const LegalityQuery &Query) {          return Query.Types[0].isScalar() &&                 Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; | 

