summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp18
-rw-r--r--llvm/test/CodeGen/AArch64/taildup-cfi.ll96
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll6
3 files changed, 116 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index f51c884839b..b4ee8777d1f 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -371,6 +372,13 @@ void TailDuplicator::duplicateInstruction(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
const DenseSet<unsigned> &UsedByPhi) {
+ // Allow duplication of CFI instructions.
+ if (MI->isCFIInstruction()) {
+ BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()),
+ TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(
+ MI->getOperand(0).getCFIIndex());
+ return;
+ }
MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
if (PreRegAlloc) {
for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) {
@@ -585,7 +593,13 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
unsigned InstrCount = 0;
for (MachineInstr &MI : TailBB) {
// Non-duplicable things shouldn't be tail-duplicated.
- if (MI.isNotDuplicable())
+ // CFI instructions are marked as non-duplicable, because Darwin compact
+ // unwind info emission can't handle multiple prologue setups. In case of
+ // DWARF, allow them be duplicated, so that their existence doesn't prevent
+ // tail duplication of some basic blocks, that would be duplicated otherwise.
+ if (MI.isNotDuplicable() &&
+ (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
+ !MI.isCFIInstruction()))
return false;
// Convergent instructions can be duplicated only if doing so doesn't add
@@ -605,7 +619,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (PreRegAlloc && MI.isCall())
return false;
- if (!MI.isPHI() && !MI.isDebugValue())
+ if (!MI.isPHI() && !MI.isMetaInstruction())
InstrCount += 1;
if (InstrCount > MaxDuplicateCount)
diff --git a/llvm/test/CodeGen/AArch64/taildup-cfi.ll b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
new file mode 100644
index 00000000000..46fe0ec1e83
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
@@ -0,0 +1,96 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=arm64-unknown-linux-gnu -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LINUX
+; RUN: llc -mtriple=arm64-apple-darwin -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DARWIN
+
+; ModuleID = 'taildup-cfi.c'
+source_filename = "taildup-cfi.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@g = common local_unnamed_addr global i32 0, align 4
+@f = common local_unnamed_addr global i32 0, align 4
+@a = common local_unnamed_addr global i32 0, align 4
+@m = common local_unnamed_addr global i32 0, align 4
+@l = common local_unnamed_addr global i32 0, align 4
+@j = common local_unnamed_addr global i32 0, align 4
+@k = common local_unnamed_addr global i32 0, align 4
+@i = common local_unnamed_addr global i32 0, align 4
+@d = common local_unnamed_addr global i32 0, align 4
+@c = common local_unnamed_addr global i32 0, align 4
+@e = common local_unnamed_addr global i32 0, align 4
+@h = common local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: norecurse nounwind uwtable
+define void @n(i32 %o, i32* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, i32* @g, align 4, !tbaa !2
+ %tobool = icmp eq i32 %0, 0
+ br i1 %tobool, label %entry.if.end_crit_edge, label %if.then
+
+entry.if.end_crit_edge: ; preds = %entry
+ %.pre = load i32, i32* @f, align 4, !tbaa !2
+ br label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* @f, align 4, !tbaa !2
+ br label %if.end
+
+; DARWIN-NOT: Merging into block
+; LINUX: Merging into block
+
+if.end: ; preds = %entry.if.end_crit_edge, %if.then
+ %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ 0, %if.then ]
+ %cmp6 = icmp slt i32 %1, %o
+ br i1 %cmp6, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %if.end
+ %.pre7 = load i32, i32* @a, align 4, !tbaa !2
+ %.pre8 = load i32, i32* @l, align 4, !tbaa !2
+ %.pre9 = load i32, i32* @j, align 4, !tbaa !2
+ %.pre10 = load i32, i32* @k, align 4, !tbaa !2
+ %.pre11 = load i32, i32* @i, align 4, !tbaa !2
+ br label %for.body
+
+for.body: ; preds = %if.end5, %for.body.lr.ph
+ %2 = phi i32 [ %.pre11, %for.body.lr.ph ], [ %7, %if.end5 ]
+ %3 = phi i32 [ %.pre10, %for.body.lr.ph ], [ %8, %if.end5 ]
+ %4 = phi i32 [ %.pre9, %for.body.lr.ph ], [ %9, %if.end5 ]
+ %5 = phi i32 [ %1, %for.body.lr.ph ], [ %inc, %if.end5 ]
+ store i32 %.pre7, i32* @m, align 4, !tbaa !2
+ %mul = mul nsw i32 %3, %4
+ %cmp1 = icmp sgt i32 %.pre8, %mul
+ %conv = zext i1 %cmp1 to i32
+ %cmp2 = icmp slt i32 %2, %conv
+ br i1 %cmp2, label %if.then4, label %if.end5
+
+if.then4: ; preds = %for.body
+ %6 = load i32, i32* @d, align 4, !tbaa !2
+ store i32 %6, i32* @k, align 4, !tbaa !2
+ store i32 %6, i32* @i, align 4, !tbaa !2
+ store i32 %6, i32* @j, align 4, !tbaa !2
+ br label %if.end5
+
+if.end5: ; preds = %if.then4, %for.body
+ %7 = phi i32 [ %6, %if.then4 ], [ %2, %for.body ]
+ %8 = phi i32 [ %6, %if.then4 ], [ %3, %for.body ]
+ %9 = phi i32 [ %6, %if.then4 ], [ %4, %for.body ]
+ %10 = load i32, i32* @c, align 4, !tbaa !2
+ %idxprom = sext i32 %10 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+ %11 = load i32, i32* %arrayidx, align 4, !tbaa !2
+ %12 = load i32, i32* @e, align 4, !tbaa !2
+ %sub = sub nsw i32 %11, %12
+ store i32 %sub, i32* @h, align 4, !tbaa !2
+ %inc = add nsw i32 %5, 1
+ store i32 %inc, i32* @f, align 4, !tbaa !2
+ %exitcond = icmp eq i32 %inc, %o
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %if.end5, %if.end
+ ret void
+}
+
+attributes #0 = { norecurse nounwind uwtable }
+
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{}
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index c5623116c85..b5ae7a2ff46 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -712,10 +712,12 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; AVX512BW-NEXT: jg LBB17_1
; AVX512BW-NEXT: ## %bb.2:
; AVX512BW-NEXT: vpcmpltud %zmm2, %zmm1, %k0
-; AVX512BW-NEXT: jmp LBB17_3
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
+; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB17_1:
; AVX512BW-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
-; AVX512BW-NEXT: LBB17_3:
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512BW-NEXT: vzeroupper
OpenPOWER on IntegriCloud