diff options
| author | Roman Tereshin <rtereshin@apple.com> | 2019-12-12 13:20:41 -0800 |
|---|---|---|
| committer | Roman Tereshin <rtereshin@apple.com> | 2019-12-13 15:45:18 -0800 |
| commit | 8731799fc6cf70bf4de5688ada506a222272a6a2 (patch) | |
| tree | 9ebed0493a2e970c3576447e12875fc93f29739e /llvm | |
| parent | 18bf9670aac901f71bc43ac55c5feb603555a1f6 (diff) | |
| download | bcm5719-llvm-8731799fc6cf70bf4de5688ada506a222272a6a2.tar.gz bcm5719-llvm-8731799fc6cf70bf4de5688ada506a222272a6a2.zip | |
[Legalizer] Making artifact combining order-independent
Legalization algorithm is complicated by two facts:
1) While regular instructions should be possible to legalize in
an isolated, per-instruction, context-free manner, legalization
artifacts can only be eliminated in pairs, which could be deeply, and
ultimately arbitrary nested: { [ () ] }, where which paranthesis kind
depicts an artifact kind, like extend, unmerge, etc. Such structure
can only be fully eliminated by simple local combines if they are
attempted in a particular order (inside out), or alternatively by
repeated scans each eliminating only one innermost pair, resulting in
O(n^2) complexity.
2) Some artifacts might in fact be regular instructions that could (and
sometimes should) be legalized by the target-specific rules. Which
means failure to eliminate all artifacts on the first iteration is
not a failure, they need to be tried as instructions, which may
produce more artifacts, including the ones that are in fact regular
instructions, resulting in a non-constant number of iterations
required to finish the process.
I trust the recently introduced termination condition (no new artifacts
were created during as-a-regular-instruction-retrial of artifacts not
eliminated on the previous iteration) to be efficient in providing
termination, but only performing the legalization in full if and only if
at each step such chains of artifacts are successfully eliminated in
full as well.
Which is currently not guaranteed, as the artifact combines are applied
only once and in an arbitrary order that has to do with the order of
creation or insertion of artifacts into their worklist, which is a no
particular order.
In this patch I make a small change to the artifact combiner, making it
to re-insert into the worklist immediate (modulo a look-through copies)
artifact users of each vreg that changes its definition due to an
artifact combine.
Here the first scan through the artifacts worklist, while not
being done in any guaranteed order, only needs to find the innermost
pair(s) of artifacts that could be immediately combined out. After that
the process follows def-use chains, making them shorter at each step, thus
combining everything that can be combined in O(n) time.
Reviewers: volkan, aditya_nandakumar, qcolombet, paquette, aemerson, dsanders
Reviewed By: aditya_nandakumar, paquette
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71448
Diffstat (limited to 'llvm')
10 files changed, 348 insertions, 144 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 376d07dee8b..4c2ab316930 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -46,7 +46,8 @@ public: : Builder(B), MRI(MRI), LI(LI) {} bool tryCombineAnyExt(MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &DeadInsts) { + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); Builder.setInstr(MI); @@ -58,6 +59,7 @@ public: if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } @@ -70,6 +72,7 @@ public: m_GSExt(m_Reg(ExtSrc)), m_GZExt(m_Reg(ExtSrc)))))) { Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *ExtMI, DeadInsts); return true; } @@ -83,15 +86,17 @@ public: auto &CstVal = SrcMI->getOperand(1); Builder.buildConstant( DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *SrcMI, DeadInsts); return true; } } - return tryFoldImplicitDef(MI, DeadInsts); + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); } bool tryCombineZExt(MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &DeadInsts) { + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_ZEXT); Builder.setInstr(MI); @@ -124,15 +129,17 @@ public: auto &CstVal = SrcMI->getOperand(1); Builder.buildConstant( DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits())); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *SrcMI, DeadInsts); return true; } } - return tryFoldImplicitDef(MI, DeadInsts); + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); } bool tryCombineSExt(MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &DeadInsts) { + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_SEXT); Builder.setInstr(MI); @@ -154,11 +161,12 @@ public: markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } - return tryFoldImplicitDef(MI, DeadInsts); + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); } bool tryCombineTrunc(MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &DeadInsts) { + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_TRUNC); Builder.setInstr(MI); @@ -174,6 +182,7 @@ public: auto &CstVal = SrcMI->getOperand(1); Builder.buildConstant( DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits())); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *SrcMI, DeadInsts); return true; } @@ -182,10 +191,10 @@ public: return false; } - /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). bool tryFoldImplicitDef(MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &DeadInsts) { + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs) { unsigned Opcode = MI.getOpcode(); assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || Opcode == TargetOpcode::G_SEXT); @@ -202,6 +211,7 @@ public: return false; LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); + UpdatedDefs.push_back(DstReg); } else { // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. @@ -209,6 +219,7 @@ public: return false; LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); Builder.buildConstant(DstReg, 0); + UpdatedDefs.push_back(DstReg); } markInstAndDefDead(MI, *DefMI, DeadInsts); @@ -269,7 +280,8 @@ public: } bool tryCombineMerges(MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &DeadInsts) { + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); unsigned NumDefs = MI.getNumOperands() - 1; @@ -319,8 +331,8 @@ public: SmallVector<Register, 2> TmpRegs; // This is a vector that is being scalarized and casted. Extract to // the element type, and do the conversion on the scalars. - LLT MergeEltTy - = MRI.getType(MergeI->getOperand(0).getReg()).getElementType(); + LLT MergeEltTy = + MRI.getType(MergeI->getOperand(0).getReg()).getElementType(); for (unsigned j = 0; j < NumMergeRegs; ++j) TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy)); @@ -331,6 +343,7 @@ public: } else { Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); } + UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); } } else if (NumMergeRegs > NumDefs) { @@ -352,7 +365,9 @@ public: ++j, ++Idx) Regs.push_back(MergeI->getOperand(Idx).getReg()); - Builder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs); + Register DefReg = MI.getOperand(DefIdx).getReg(); + Builder.buildMerge(DefReg, Regs); + UpdatedDefs.push_back(DefReg); } } else { @@ -366,8 +381,9 @@ public: for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); - Builder.buildInstr(ConvertOp, {MI.getOperand(Idx).getReg()}, - {MergeSrc}); + Register DefReg = MI.getOperand(Idx).getReg(); + Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); + UpdatedDefs.push_back(DefReg); } markInstAndDefDead(MI, *MergeI, DeadInsts); @@ -378,9 +394,11 @@ public: "Bitcast and the other kinds of conversions should " "have happened earlier"); - for (unsigned Idx = 0; Idx < NumDefs; ++Idx) - MRI.replaceRegWith(MI.getOperand(Idx).getReg(), - MergeI->getOperand(Idx + 1).getReg()); + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { + Register NewDef = MergeI->getOperand(Idx + 1).getReg(); + MRI.replaceRegWith(MI.getOperand(Idx).getReg(), NewDef); + UpdatedDefs.push_back(NewDef); + } } markInstAndDefDead(MI, *MergeI, DeadInsts); @@ -399,7 +417,8 @@ public: } bool tryCombineExtract(MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &DeadInsts) { + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); // Try to use the source registers from a G_MERGE_VALUES @@ -414,13 +433,14 @@ public: // for N >= %2.getSizeInBits() / 2 // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() - unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg()); - MachineInstr *MergeI = MRI.getVRegDef(Src); + Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); + MachineInstr *MergeI = MRI.getVRegDef(SrcReg); if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode())) return false; - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - LLT SrcTy = MRI.getType(Src); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); // TODO: Do we need to check if the resulting extract is supported? unsigned ExtractDstSize = DstTy.getSizeInBits(); @@ -438,10 +458,9 @@ public: // TODO: We could modify MI in place in most cases. Builder.setInstr(MI); - Builder.buildExtract( - MI.getOperand(0).getReg(), - MergeI->getOperand(MergeSrcIdx + 1).getReg(), - Offset - MergeSrcIdx * MergeSrcSize); + Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(), + Offset - MergeSrcIdx * MergeSrcSize); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *MergeI, DeadInsts); return true; } @@ -458,33 +477,79 @@ public: // etc, process the dead instructions now if any. if (!DeadInsts.empty()) deleteMarkedDeadInsts(DeadInsts, WrapperObserver); + + // Put here every vreg that was redefined in such a way that it's at least + // possible that one (or more) of its users (immediate or COPY-separated) + // could become artifact combinable with the new definition (or the + // instruction reachable from it through a chain of copies if any). + SmallVector<Register, 4> UpdatedDefs; + bool Changed = false; switch (MI.getOpcode()) { default: return false; case TargetOpcode::G_ANYEXT: - return tryCombineAnyExt(MI, DeadInsts); + Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs); + break; case TargetOpcode::G_ZEXT: - return tryCombineZExt(MI, DeadInsts); + Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs); + break; case TargetOpcode::G_SEXT: - return tryCombineSExt(MI, DeadInsts); + Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); + break; case TargetOpcode::G_UNMERGE_VALUES: - return tryCombineMerges(MI, DeadInsts); + Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs); + break; case TargetOpcode::G_EXTRACT: - return tryCombineExtract(MI, DeadInsts); - case TargetOpcode::G_TRUNC: { - if (tryCombineTrunc(MI, DeadInsts)) - return true; - - bool Changed = false; - for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg())) - Changed |= tryCombineInstruction(Use, DeadInsts, WrapperObserver); - return Changed; + Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); + break; + case TargetOpcode::G_TRUNC: + Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs); + if (!Changed) { + // Try to combine truncates away even if they are legal. As all artifact + // combines at the moment look only "up" the def-use chains, we achieve + // that by throwing truncates' users (with look through copies) into the + // ArtifactList again. + UpdatedDefs.push_back(MI.getOperand(0).getReg()); + } + break; } + // If the main loop through the ArtifactList found at least one combinable + // pair of artifacts, not only combine it away (as done above), but also + // follow the def-use chain from there to combine everything that can be + // combined within this def-use chain of artifacts. + while (!UpdatedDefs.empty()) { + Register NewDef = UpdatedDefs.pop_back_val(); + assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg"); + for (MachineInstr &Use : MRI.use_instructions(NewDef)) { + switch (Use.getOpcode()) { + // Keep this list in sync with the list of all artifact combines. + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_UNMERGE_VALUES: + case TargetOpcode::G_EXTRACT: + case TargetOpcode::G_TRUNC: + // Adding Use to ArtifactList. + WrapperObserver.changedInstr(Use); + break; + case TargetOpcode::COPY: { + Register Copy = Use.getOperand(0).getReg(); + if (Copy.isVirtual()) + UpdatedDefs.push_back(Copy); + break; + } + default: + // If we do not have an artifact combine for the opcode, there is no + // point in adding it to the ArtifactList as nothing interesting will + // be done to it anyway. + break; + } + } } + return Changed; } private: - static unsigned getArtifactSrcReg(const MachineInstr &MI) { switch (MI.getOpcode()) { case TargetOpcode::COPY: diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 966eedd100b..e789e4a333d 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -212,6 +212,11 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, // ArtifactCombiner to combine away them. if (isArtifact(MI)) { LLVM_DEBUG(dbgs() << ".. Not legalized, moving to artifacts retry\n"); + assert(NumArtifacts == 0 && + "Artifacts are only expected in instruction list starting the " + "second iteration, but each iteration starting second must " + "start with an empty artifacts list"); + (void)NumArtifacts; RetryList.push_back(&MI); continue; } @@ -224,7 +229,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, // Try to combine the instructions in RetryList again if there // are new artifacts. If not, stop legalizing. if (!RetryList.empty()) { - if (ArtifactList.size() > NumArtifacts) { + if (!ArtifactList.empty()) { while (!RetryList.empty()) ArtifactList.insert(RetryList.pop_back_val()); } else { diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp index 27419b28d27..4abbc23e4cb 100644 --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -651,8 +651,9 @@ void MipsRegisterBankInfo::setRegBank(MachineInstr &MI, static void combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner, MachineInstr &MI) { + SmallVector<Register, 4> UpdatedDefs; SmallVector<MachineInstr *, 2> DeadInstrs; - ArtCombiner.tryCombineMerges(MI, DeadInstrs); + ArtCombiner.tryCombineMerges(MI, DeadInstrs, UpdatedDefs); for (MachineInstr *DeadMI : DeadInstrs) DeadMI->eraseFromParent(); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index dab44b84355..78ae99d9844 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -167,7 +167,7 @@ define void @nonpow2_load_narrowing() { ret void } -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %5:fpr32(s32) = G_EXTRACT %{{[0-9]+}}:fpr(s128), 64 (in function: nonpow2_store_narrowing) +; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: %14:gpr64(s64), %15:gpr(s1) = G_UADDE %17:gpr, %17:gpr, %13:gpr (in function: nonpow2_store_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_store_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_store_narrowing: define void @nonpow2_store_narrowing(i96* %c) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir index 6d10c09aea6..09ae228b4f1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -15,9 +15,7 @@ body: | ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[COPY1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY2]](s8) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) ; CHECK: $x0 = COPY [[ANYEXT]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s4) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir index 6ccddd99a47..b4a37ccbff3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir @@ -22,18 +22,19 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[DEF1]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -33 - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C3]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[C3]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 ; CHECK: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND2]](s32), [[C4]] - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY4]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[COPY5]] ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[OR1]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2 ; CHECK: bb.2: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir index fc6c1d552da..656806a448e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir @@ -8,14 +8,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ogt), [[COPY]](s32), [[COPY1]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FCMP]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; CHECK: $w0 = COPY [[COPY5]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FCMP]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) + ; CHECK: $w0 = COPY [[COPY3]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(s1) = G_FCMP floatpred(ogt), %0(s32), %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir index 79964509f83..43bc6b00929 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir @@ -36,15 +36,14 @@ body: | ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[TRUNC]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[TRUNC1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC3]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[COPY1]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -64,27 +63,28 @@ body: | ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[ZEXT]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[ZEXT1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC2]](s16) - ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT1]], [[ZEXT3]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[COPY4]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) + ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) + ; CHECK: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[COPY6]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir index 98504bafef8..5f2c21158b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -608,16 +608,15 @@ body: | ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; SI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) ; SI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[TRUNC]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[TRUNC1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[COPY2]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[C2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] @@ -632,16 +631,15 @@ body: | ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] ; VI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; VI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) ; VI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[TRUNC]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[TRUNC1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[COPY2]](s32) + ; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[C2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] @@ -656,16 +654,15 @@ body: | ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) ; GFX9: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[TRUNC]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[TRUNC1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[COPY2]](s32) + ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[C2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; GFX9: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] @@ -693,15 +690,14 @@ body: | ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) ; SI: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[TRUNC1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) - ; SI: [[TRUNC2:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) - ; SI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC2]], [[BUILD_VECTOR]] + ; SI: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) + ; SI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]] ; SI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; SI: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) ; VI-LABEL: name: test_copysign_v2s32_v2s64 @@ -712,15 +708,14 @@ body: | ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) ; VI: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[TRUNC1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) - ; VI: [[TRUNC2:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) - ; VI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC2]], [[BUILD_VECTOR]] + ; VI: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) + ; VI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]] ; VI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; VI: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) ; GFX9-LABEL: name: test_copysign_v2s32_v2s64 @@ -731,15 +726,14 @@ body: | ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) ; GFX9: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[TRUNC1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC2]], [[BUILD_VECTOR]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) + ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]] ; GFX9: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; GFX9: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp index 7bb348d3046..507a21582ef 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp @@ -26,6 +26,29 @@ namespace { << "unable to legalize instruction: " << MISStream.str(); } +DefineLegalizerInfo(ALegalizer, { + auto p0 = LLT::pointer(0, 64); + auto v2s8 = LLT::vector(2, 8); + auto v2s16 = LLT::vector(2, 16); + getActionDefinitionsBuilder(G_LOAD) + .legalForTypesWithMemDesc({{s16, p0, 8, 8}}) + .scalarize(0) + .clampScalar(0, s16, s16); + getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s64}}); + getActionDefinitionsBuilder(G_CONSTANT).legalFor({s32, s64}); + getActionDefinitionsBuilder(G_BUILD_VECTOR) + .legalFor({{v2s16, s16}}) + .clampScalar(1, s16, s16); + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).legalFor({{v2s8, s16}}); + getActionDefinitionsBuilder(G_ANYEXT).legalFor({{s32, s16}}); + getActionDefinitionsBuilder(G_ZEXT).legalFor({{s32, s16}}); + getActionDefinitionsBuilder(G_SEXT).legalFor({{s32, s16}}); + getActionDefinitionsBuilder(G_AND).legalFor({s32}); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + getActionDefinitionsBuilder(G_ASHR).legalFor({{s32, s32}}); + getActionDefinitionsBuilder(G_SHL).legalFor({{s32, s32}}); +}); + TEST_F(GISelMITest, BasicLegalizerTest) { StringRef MIRString = R"( %vptr:_(p0) = COPY $x4 @@ -36,23 +59,6 @@ TEST_F(GISelMITest, BasicLegalizerTest) { if (!TM) return; - DefineLegalizerInfo(ALegalizer, { - auto p0 = LLT::pointer(0, 64); - auto v2s8 = LLT::vector(2, 8); - auto v2s16 = LLT::vector(2, 16); - getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s16, p0, 8, 8}}) - .scalarize(0) - .clampScalar(0, s16, s16); - getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s64}}); - getActionDefinitionsBuilder(G_CONSTANT).legalFor({s64}); - getActionDefinitionsBuilder(G_BUILD_VECTOR) - .legalFor({{v2s16, s16}}) - .clampScalar(1, s16, s16); - getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).legalFor({{v2s8, s16}}); - getActionDefinitionsBuilder(G_ANYEXT).legalFor({{s32, s16}}); - }); - ALegalizerInfo LI(MF->getSubtarget()); Legalizer::MFResult Result = @@ -76,4 +82,140 @@ TEST_F(GISelMITest, BasicLegalizerTest) { EXPECT_TRUE(CheckMachineFunction(*MF, CheckString)) << *MF; } +// Making sure the legalization finishes successfully w/o failure to combine +// away all the legalization artifacts regardless of the order of their +// creation. +TEST_F(GISelMITest, UnorderedArtifactCombiningTest) { + StringRef MIRString = R"( + %vptr:_(p0) = COPY $x4 + %v:_(<2 x s8>) = G_LOAD %vptr:_(p0) :: (load 2, align 1) + %v0:_(s8), %v1:_(s8) = G_UNMERGE_VALUES %v:_(<2 x s8>) + %v0_ext:_(s16) = G_ANYEXT %v0:_(s8) + $h4 = COPY %v0_ext:_(s16) + )"; + setUp(MIRString.rtrim(' ')); + if (!TM) + return; + + ALegalizerInfo LI(MF->getSubtarget()); + + // The events here unfold as follows: + // 1. First, the function is scanned pre-forming the worklist of artifacts: + // + // UNMERGE (1): pushed into the worklist first, will be processed last. + // | + // ANYEXT (2) + // + // 2. Second, the load is scalarized, and then its destination is widened, + // forming the following chain of legalization artifacts: + // + // TRUNC (4): created last, will be processed first. + // | + // BUILD_VECTOR (3) + // | + // UNMERGE (1): pushed into the worklist first, will be processed last. + // | + // ANYEXT (2) + // + // 3. Third, the artifacts are attempted to be combined in pairs, looking + // through the def-use chain from the roots towards the leafs, visiting the + // roots in order they happen to be in the worklist: + // (4) - (trunc): can not be combined; + // (3) - (build_vector (trunc)): can not be combined; + // (2) - (anyext (unmerge)): can not be combined; + // (1) - (unmerge (build_vector)): combined and eliminated; + // + // leaving the function in the following state: + // + // TRUNC (1): moved to non-artifact instructions worklist first. + // | + // ANYEXT (2): also moved to non-artifact instructions worklist. + // + // Every other instruction is successfully legalized in full. + // If combining (unmerge (build_vector)) does not re-insert every artifact + // that had its def-use chain modified (shortened) into the artifact + // worklist (here it's just ANYEXT), the process moves on onto the next + // outer loop iteration of the top-level legalization algorithm here, w/o + // performing all the artifact combines possible. Let's consider this + // scenario first: + // 4.A. Neither TRUNC, nor ANYEXT can be legalized in isolation, both of them + // get moved to the retry worklist, but no additional artifacts were + // created in the process, thus algorithm concludes no progress could be + // made, and fails. + // 4.B. If, however, combining (unmerge (build_vector)) had re-inserted + // ANYEXT into the worklist (as ANYEXT's source changes, not by value, + // but by implementation), (anyext (trunc)) combine happens next, which + // fully eliminates all the artifacts and legalization succeeds. + // + // We're looking into making sure that (4.B) happens here, not (4.A). Note + // that in that case the first scan through the artifacts worklist, while not + // being done in any guaranteed order, only needs to find the innermost + // pair(s) of artifacts that could be immediately combined out. After that + // the process follows def-use chains, making them shorter at each step, thus + // combining everything that can be combined in O(n) time. + Legalizer::MFResult Result = + Legalizer::legalizeMachineFunction(*MF, LI, {}, B); + + EXPECT_TRUE(isNullMIPtr(Result.FailedOn)); + EXPECT_TRUE(Result.Changed); + + StringRef CheckString = R"( + CHECK: %vptr:_(p0) = COPY $x4 + CHECK-NEXT: [[LOAD_0:%[0-9]+]]:_(s16) = G_LOAD %vptr:_(p0) :: (load 1) + CHECK: %v0_ext:_(s16) = COPY [[LOAD_0]]:_(s16) + CHECK-NEXT: $h4 = COPY %v0_ext:_(s16) + )"; + + EXPECT_TRUE(CheckMachineFunction(*MF, CheckString)) << *MF; +} + +TEST_F(GISelMITest, UnorderedArtifactCombiningManyCopiesTest) { + StringRef MIRString = R"( + %vptr:_(p0) = COPY $x4 + %v:_(<2 x s8>) = G_LOAD %vptr:_(p0) :: (load 2, align 1) + %vc0:_(<2 x s8>) = COPY %v:_(<2 x s8>) + %vc1:_(<2 x s8>) = COPY %v:_(<2 x s8>) + %vc00:_(s8), %vc01:_(s8) = G_UNMERGE_VALUES %vc0:_(<2 x s8>) + %vc10:_(s8), %vc11:_(s8) = G_UNMERGE_VALUES %vc1:_(<2 x s8>) + %v0t:_(s8) = COPY %vc00:_(s8) + %v0:_(s8) = COPY %v0t:_(s8) + %v1t:_(s8) = COPY %vc11:_(s8) + %v1:_(s8) = COPY %v1t:_(s8) + %v0_zext:_(s32) = G_ZEXT %v0:_(s8) + %v1_sext:_(s32) = G_SEXT %v1:_(s8) + $w4 = COPY %v0_zext:_(s32) + $w5 = COPY %v1_sext:_(s32) + )"; + setUp(MIRString.rtrim(' ')); + if (!TM) + return; + + ALegalizerInfo LI(MF->getSubtarget()); + + Legalizer::MFResult Result = + Legalizer::legalizeMachineFunction(*MF, LI, {}, B); + + EXPECT_TRUE(isNullMIPtr(Result.FailedOn)); + EXPECT_TRUE(Result.Changed); + + StringRef CheckString = R"( + CHECK: %vptr:_(p0) = COPY $x4 + CHECK-NEXT: [[LOAD_0:%[0-9]+]]:_(s16) = G_LOAD %vptr:_(p0) :: (load 1) + CHECK-NEXT: [[OFFSET_1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + CHECK-NEXT: [[VPTR_1:%[0-9]+]]:_(p0) = G_PTR_ADD %vptr:_, [[OFFSET_1]]:_(s64) + CHECK-NEXT: [[LOAD_1:%[0-9]+]]:_(s16) = G_LOAD [[VPTR_1]]:_(p0) :: (load 1) + CHECK-NEXT: [[FF_MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + CHECK-NEXT: [[V0_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_0]]:_(s16) + CHECK-NEXT: %v0_zext:_(s32) = G_AND [[V0_EXT]]:_, [[FF_MASK]]:_ + CHECK-NEXT: [[V1_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_1]]:_(s16) + CHECK-NEXT: [[SHAMNT:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + CHECK-NEXT: [[V1_SHL:%[0-9]+]]:_(s32) = G_SHL [[V1_EXT]]:_, [[SHAMNT]]:_(s32) + CHECK-NEXT: %v1_sext:_(s32) = G_ASHR [[V1_SHL]]:_, [[SHAMNT]]:_(s32) + CHECK-NEXT: $w4 = COPY %v0_zext:_(s32) + CHECK-NEXT: $w5 = COPY %v1_sext:_(s32) + )"; + + EXPECT_TRUE(CheckMachineFunction(*MF, CheckString)) << *MF; +} + } // namespace |

