summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2017-02-25 18:12:25 +0000
committerCraig Topper <craig.topper@gmail.com>2017-02-25 18:12:25 +0000
commit3b8aca2ecfe75fc7b2b345dc40e020ada1271c18 (patch)
tree33d86f1dd90293953e276f05c87f3b24daf73525
parentac56719231872f01617c5290855a8470aebaae71 (diff)
downloadbcm5719-llvm-3b8aca2ecfe75fc7b2b345dc40e020ada1271c18.tar.gz
bcm5719-llvm-3b8aca2ecfe75fc7b2b345dc40e020ada1271c18.zip
[ExecutionDepsFix] Don't make copies of LiveReg objects when collecting operands for soft instructions
Summary: While collecting operands we make copies of the LiveReg objects which are stored in the LiveRegs array. If the instruction uses the same register multiple times we end up with multiple copies. Later we iterate through the collected list of LiveReg objects and merge DomainValues. In the process of doing this the merge function can change the contents of the original LiveReg object in the LiveRegs array, but not the copies that have been made. So when we get to the second usage of the register we end up seeing a stale copy of the LiveReg object. To fix this I've stopped copying and now just store a pointer to the original LiveReg object. Another option might be to avoid adding the same register to the Regs array twice, but this approach seemed simpler. The included test case exposes this bug due to an AVX-512 masked OR instruction using the same register for the passthru operand and one of the inputs to the OR operation. Fixes PR30284. Reviewers: RKSimon, stoklund, MatzeB, spatel, myatsina Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30242 llvm-svn: 296260
-rw-r--r--llvm/lib/CodeGen/ExecutionDepsFix.cpp21
-rw-r--r--llvm/test/CodeGen/X86/pr30284.ll21
2 files changed, 29 insertions, 13 deletions
diff --git a/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index 4a310f70769..5a6ab6b6261 100644
--- a/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -721,7 +721,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Kill off any remaining uses that don't match available, and build a list of
// incoming DomainValues that we want to merge.
- SmallVector<LiveReg, 4> Regs;
+ SmallVector<const LiveReg *, 4> Regs;
for (int rx : used) {
assert(LiveRegs && "no space allocated for live registers");
const LiveReg &LR = LiveRegs[rx];
@@ -731,16 +731,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
continue;
}
// Sorted insertion.
- bool Inserted = false;
- for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end();
- i != e && !Inserted; ++i) {
- if (LR.Def < i->Def) {
- Inserted = true;
- Regs.insert(i, LR);
- }
- }
- if (!Inserted)
- Regs.push_back(LR);
+ auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR,
+ [](const LiveReg *LHS, const LiveReg *RHS) {
+ return LHS->Def < RHS->Def;
+ });
+ Regs.insert(I, &LR);
}
// doms are now sorted in order of appearance. Try to merge them all, giving
@@ -748,14 +743,14 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
DomainValue *dv = nullptr;
while (!Regs.empty()) {
if (!dv) {
- dv = Regs.pop_back_val().Value;
+ dv = Regs.pop_back_val()->Value;
// Force the first dv to match the current instruction.
dv->AvailableDomains = dv->getCommonDomains(available);
assert(dv->AvailableDomains && "Domain should have been filtered");
continue;
}
- DomainValue *Latest = Regs.pop_back_val().Value;
+ DomainValue *Latest = Regs.pop_back_val()->Value;
// Skip already merged values.
if (Latest == dv || Latest->Next)
continue;
diff --git a/llvm/test/CodeGen/X86/pr30284.ll b/llvm/test/CodeGen/X86/pr30284.ll
new file mode 100644
index 00000000000..99fa9973a61
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr30284.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=avx512dq | FileCheck %s
+
+define void @f_f___un_3C_unf_3E_un_3C_unf_3E_() {
+; CHECK-LABEL: f_f___un_3C_unf_3E_un_3C_unf_3E_:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd 0, %zmm0
+; CHECK-NEXT: vmovapd 64, %zmm1
+; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [0,16,0,16,0,16,0,16,0,16,0,16,0,16,0,16]
+; CHECK-NEXT: vorpd %zmm2, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: vorpd %zmm2, %zmm1, %zmm1 {%k1}
+; CHECK-NEXT: vmovapd %zmm1, 64
+; CHECK-NEXT: vmovapd %zmm0, 0
+; CHECK-NEXT: retl
+ %a_load22 = load <16 x i64>, <16 x i64>* null, align 1
+ %bitop = or <16 x i64> %a_load22, <i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736, i64 68719476736>
+ %v.i = load <16 x i64>, <16 x i64>* null
+ %v1.i41 = select <16 x i1> undef, <16 x i64> %bitop, <16 x i64> %v.i
+ store <16 x i64> %v1.i41, <16 x i64>* null
+ ret void
+}
OpenPOWER on IntegriCloud