diff options
5 files changed, 32 insertions, 153 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index d1eea060143..6e4c9674519 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -639,10 +639,6 @@ public:      return actionIf(LegalizeAction::Unsupported,                      LegalityPredicates::memSizeInBytesNotPow2(0));    } -  LegalizeRuleSet &lowerIfMemSizeNotPow2() { -    return actionIf(LegalizeAction::Lower, -                    LegalityPredicates::memSizeInBytesNotPow2(0)); -  }    LegalizeRuleSet &customIf(LegalityPredicate Predicate) {      // We have no choice but conservatively assume that a custom action with a diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index b491278a9b8..71e7b169ad0 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1483,57 +1483,11 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {      LLT DstTy = MRI.getType(DstReg);      auto &MMO = **MI.memoperands_begin(); -    if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { -      if (MI.getOpcode() == TargetOpcode::G_LOAD) { -        // This load needs splitting into power of 2 sized loads. -        if (DstTy.isVector()) +    if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { +      // In the case of G_LOAD, this was a non-extending load already and we're +      // about to lower to the same instruction. +      if (MI.getOpcode() == TargetOpcode::G_LOAD)            return UnableToLegalize; -        if (isPowerOf2_32(DstTy.getSizeInBits())) -          return UnableToLegalize; // Don't know what we're being asked to do. - -        // Our strategy here is to generate anyextending loads for the smaller -        // types up to next power-2 result type, and then combine the two larger -        // result values together, before truncating back down to the non-pow-2 -        // type. -        // E.g. v1 = i24 load => -        // v2 = i32 load (2 byte) -        // v3 = i32 load (1 byte) -        // v4 = i32 shl v3, 16 -        // v5 = i32 or v4, v2 -        // v1 = i24 trunc v5 -        // By doing this we generate the correct truncate which should get -        // combined away as an artifact with a matching extend. -        uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); -        uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; - -        MachineFunction &MF = MIRBuilder.getMF(); -        MachineMemOperand *LargeMMO = -            MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); -        MachineMemOperand *SmallMMO = MF.getMachineMemOperand( -            &MMO, LargeSplitSize / 8, SmallSplitSize / 8); - -        LLT PtrTy = MRI.getType(PtrReg); -        unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); -        LLT AnyExtTy = LLT::scalar(AnyExtSize); -        unsigned LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); -        unsigned SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); -        auto LargeLoad = -            MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO); - -        auto OffsetCst = -            MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); -        unsigned GEPReg = MRI.createGenericVirtualRegister(PtrTy); -        auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); -        auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), -                                              *SmallMMO); - -        auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); -        auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); -        auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); -        MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); -        MI.eraseFromParent(); -        return Legalized; -      }        MIRBuilder.buildLoad(DstReg, PtrReg, MMO);        MI.eraseFromParent();        return Legalized; @@ -1562,51 +1516,6 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {      return UnableToLegalize;    } -  case TargetOpcode::G_STORE: { -    // Lower a non-power of 2 store into multiple pow-2 stores. -    // E.g. split an i24 store into an i16 store + i8 store. -    // We do this by first extending the stored value to the next largest power -    // of 2 type, and then using truncating stores to store the components. -    // By doing this, likewise with G_LOAD, generate an extend that can be -    // artifact-combined away instead of leaving behind extracts. -    unsigned SrcReg = MI.getOperand(0).getReg(); -    unsigned PtrReg = MI.getOperand(1).getReg(); -    LLT SrcTy = MRI.getType(SrcReg); -    MachineMemOperand &MMO = **MI.memoperands_begin(); -    if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) -      return UnableToLegalize; -    if (SrcTy.isVector()) -      return UnableToLegalize; -    if (isPowerOf2_32(SrcTy.getSizeInBits())) -      return UnableToLegalize; // Don't know what we're being asked to do. - -    // Extend to the next pow-2. -    const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); -    auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); - -    // Obtain the smaller value by shifting away the larger value. -    uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); -    uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; -    auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); -    auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); - -    // Generate the GEP and truncating stores. -    LLT PtrTy = MRI.getType(PtrReg); -    auto OffsetCst = -        MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); -    unsigned GEPReg = MRI.createGenericVirtualRegister(PtrTy); -    auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); - -    MachineFunction &MF = MIRBuilder.getMF(); -    MachineMemOperand *LargeMMO = -        MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); -    MachineMemOperand *SmallMMO = -        MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); -    MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); -    MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); -    MI.eraseFromParent(); -    return Legalized; -  }    case TargetOpcode::G_CTLZ_ZERO_UNDEF:    case TargetOpcode::G_CTTZ_ZERO_UNDEF:    case TargetOpcode::G_CTLZ: diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 7359ac0383b..46f715ac7dc 100644 --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -235,12 +235,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {        .legalForTypesWithMemDesc({{s32, p0, 8, 8},                                   {s32, p0, 16, 8}})        .clampScalar(0, s8, s64) -      .lowerIfMemSizeNotPow2() +      .widenScalarToNextPow2(0) +      // TODO: We could support sum-of-pow2's but the lowering code doesn't know +      //       how to do that yet. +      .unsupportedIfMemSizeNotPow2()        // Lower any any-extending loads left into G_ANYEXT and G_LOAD        .lowerIf([=](const LegalityQuery &Query) {          return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;        }) -      .widenScalarToNextPow2(0)        .clampMaxNumElements(0, s32, 2)        .clampMaxNumElements(0, s64, 1)        .customIf(IsPtrVecPred); @@ -248,8 +250,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {    getActionDefinitionsBuilder(G_STORE)        .legalForTypesWithMemDesc({{s8, p0, 8, 8},                                   {s16, p0, 16, 8}, -                                 {s32, p0, 8, 8}, -                                 {s32, p0, 16, 8},                                   {s32, p0, 32, 8},                                   {s64, p0, 64, 8},                                   {p0, p0, 64, 8}, @@ -260,7 +260,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {                                   {v4s32, p0, 128, 8},                                   {v2s64, p0, 128, 8}})        .clampScalar(0, s8, s64) -      .lowerIfMemSizeNotPow2() +      .widenScalarToNextPow2(0) +      // TODO: We could support sum-of-pow2's but the lowering code doesn't know +      //       how to do that yet. +      .unsupportedIfMemSizeNotPow2()        .lowerIf([=](const LegalityQuery &Query) {          return Query.Types[0].isScalar() &&                 Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index a21c251c3a7..a0c3af5c1b5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -54,6 +54,26 @@ false:  } +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s32) = G_LOAD %1:_(p0) :: (load 3 from `i24* undef`, align 1) (in function: odd_type_load) +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type_load +; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type_load +define i32 @odd_type_load() { +entry: +  %ld = load i24, i24* undef, align 1 +  %cst = zext i24 %ld to i32 +  ret i32 %cst +} + +  ; General legalizer inability to handle types whose size wasn't a power of 2. +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1:_(s42), %0:_(p0) :: (store 6 into %ir.addr, align 8) (in function: odd_type) +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type +; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type: +define void @odd_type(i42* %addr) { +  %val42 = load i42, i42* %addr +  store i42 %val42, i42* %addr +  ret void +} +  ; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1:_(<7 x s32>), %0:_(p0) :: (store 28 into %ir.addr, align 32) (in function: odd_vector)  ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector  ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir deleted file mode 100644 index 86656dea333..00000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir +++ /dev/null @@ -1,49 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=aarch64 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s ---- | -  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -  target triple = "aarch64" - -  define i32 @load_store_test(i24* %ptr, i24* %ptr2) { -    %val = load i24, i24* %ptr -    store i24 %val, i24* %ptr2 -    ret i32 0 -  } - -... ---- -name:            load_store_test -alignment:       2 -tracksRegLiveness: true -body:             | -  bb.1 (%ir-block.0): -    liveins: $x0, $x1 - -    ; CHECK-LABEL: name: load_store_test -    ; CHECK: liveins: $x0, $x1 -    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 -    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 -    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 -    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2 from %ir.ptr, align 4) -    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 -    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) -    ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 1 from %ir.ptr + 2, align 4) -    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 -    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C2]](s32) -    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LOAD]] -    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) -    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) -    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C1]](s64) -    ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store 2 into %ir.ptr2, align 4) -    ; CHECK: G_STORE [[LSHR]](s32), [[GEP1]](p0) :: (store 1 into %ir.ptr2 + 2, align 4) -    ; CHECK: $w0 = COPY [[C]](s32) -    ; CHECK: RET_ReallyLR implicit $w0 -    %0:_(p0) = COPY $x0 -    %1:_(p0) = COPY $x1 -    %3:_(s32) = G_CONSTANT i32 0 -    %2:_(s24) = G_LOAD %0(p0) :: (load 3 from %ir.ptr, align 4) -    G_STORE %2(s24), %1(p0) :: (store 3 into %ir.ptr2, align 4) -    $w0 = COPY %3(s32) -    RET_ReallyLR implicit $w0 - -...  | 

