summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYonghong Song <yhs@fb.com>2018-03-13 06:47:00 +0000
committerYonghong Song <yhs@fb.com>2018-03-13 06:47:00 +0000
commit89e47ac67150ec2287db0d4541196d0a4d3f96cc (patch)
tree457b82e0e2f841fac57a66c5838b9c62d83d9061
parentfddb9f4e2887a28ac0205adb7bb6934f7396c1e9 (diff)
downloadbcm5719-llvm-89e47ac67150ec2287db0d4541196d0a4d3f96cc.tar.gz
bcm5719-llvm-89e47ac67150ec2287db0d4541196d0a4d3f96cc.zip
bpf: Tighten subregister definition check
The current subregister definition check stops after the MOV_32_64 instruction. This means we are thinking all the following instruction sequences are safe to be eliminated: MOV_32_64 rB, wA SLL_ri rB, rB, 32 SRL_ri rB, rB, 32 However, this is *not* true. The source subregister wA of MOV_32_64 could come from a implicit truncation of 64-bit register in which case the high bits of the 64-bit register is not zeroed, therefore we can't eliminate above sequence. For example, for i32_val, we shouldn't do the elimination: long long bar (); int foo (int b, int c) { unsigned int i32_val = (unsigned int) bar(); if (i32_val < 10) return b; else return c; } Signed-off-by: Jiong Wang <jiong.wang@netronome.com> Signed-off-by: Yonghong Song <yhs@fb.com> llvm-svn: 327365
-rw-r--r--llvm/lib/Target/BPF/BPFMIPeephole.cpp18
-rw-r--r--llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll33
2 files changed, 50 insertions, 1 deletions
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index 80f4437a31c..3df14b4ad5b 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -96,6 +96,24 @@ MachineInstr *BPFMIPeephole::getInsnDefZExtSubReg(unsigned Reg) const {
Insn->getOpcode() != BPF::MOV_32_64)
return nullptr;
+ Insn = MRI->getVRegDef(Insn->getOperand(1).getReg());
+ if (!Insn || Insn->isPHI())
+ return nullptr;
+
+ if (Insn->getOpcode() == BPF::COPY) {
+ MachineOperand &opnd = Insn->getOperand(1);
+
+ if (!opnd.isReg())
+ return nullptr;
+
+ unsigned Reg = opnd.getReg();
+ if ((TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->getRegClass(Reg) == &BPF::GPRRegClass) ||
+ (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ BPF::GPRRegClass.contains(Reg)))
+ return nullptr;
+ }
+
return Insn;
}
diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll
index 826ab2b8362..6b3edaf9cf9 100644
--- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll
@@ -14,7 +14,20 @@
; return c;
; else
; return d;
-;}
+; }
+;
+; long long bar ();
+;
+; int foo (int b, int c)
+; {
+; unsigned int i32_val = (unsigned int) bar();
+;
+; if (i32_val < 10)
+; return b;
+; else
+; return c;
+; }
+
; Function Attrs: norecurse nounwind readnone
define dso_local i64 @select_u(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 {
; CHECK-LABEL: select_u:
@@ -38,3 +51,21 @@ entry:
; CHECK: if r{{[0-9]+}} s{{<|>}} r{{[0-9]+}} goto
ret i64 %c.d
}
+
+; Function Attrs: nounwind
+define dso_local i32 @foo(i32 %b, i32 %c) local_unnamed_addr #0 {
+; CHECK-LABEL: foo:
+entry:
+ %call = tail call i64 bitcast (i64 (...)* @bar to i64 ()*)() #2
+ %conv = trunc i64 %call to i32
+ %cmp = icmp ult i32 %conv, 10
+; The shifts can't be optimized out because %call comes from function call
+; returning i64 so the high bits might be valid.
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
+ %b.c = select i1 %cmp, i32 %b, i32 %c
+; CHECK: if r{{[0-9]+}} {{<|>}} {{[0-9]+}} goto
+ ret i32 %b.c
+}
+
+declare dso_local i64 @bar(...) local_unnamed_addr #1
OpenPOWER on IntegriCloud