[PPC] Better codegen for AND, ANY_EXT, SRL sequence

https://reviews.llvm.org/D24924 This improves the code generated for a sequence of AND, ANY_EXT, SRL instructions. This is a targetted fix for this special pattern. The pattern is generated by target independet dag combiner and so a more general fix may not be necessary. If we come across other similar cases, some ideas for handling it are discussed on the code review. llvm-svn: 284983
author: Ehsan Amiri <amehsan@ca.ibm.com> 2016-10-24 15:46:58 +0000
committer: Ehsan Amiri <amehsan@ca.ibm.com> 2016-10-24 15:46:58 +0000
commit: 1f31e9157de78e07e39a62bddefe9a137fee0216 (patch)
tree: f3e59fa74f8ef42ccd60f8d889725d8ac6c52229
parent: 7b12e367406347ac22716b97198581a86dee7152 (diff)
download: bcm5719-llvm-1f31e9157de78e07e39a62bddefe9a137fee0216.tar.gz
bcm5719-llvm-1f31e9157de78e07e39a62bddefe9a137fee0216.zip
3 files changed, 52 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 150b0c81398..1395a4b4099 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -2657,6 +2657,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
       MB = 64 - countTrailingOnes(Imm64);
       SH = 0;
 
+      if (Val.getOpcode() == ISD::ANY_EXTEND) {
+        auto Op0 = Val.getOperand(0);
+        if ( Op0.getOpcode() == ISD::SRL &&
+           isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
+
+           auto ResultType = Val.getNode()->getValueType(0);
+           auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
+                                               ResultType);
+           SDValue IDVal (ImDef, 0);
+
+           Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
+                         ResultType, IDVal, Op0.getOperand(0),
+                         getI32Imm(1, dl)), 0);
+           SH = 64 - Imm;
+        }
+      }
+
       // If the operand is a logical right shift, we can fold it into this
       // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
       // for n <= mb. The right shift is really a left rotate followed by a
diff --git a/llvm/lib/Target/PowerPC/README.txt b/llvm/lib/Target/PowerPC/README.txt
index f1d4ca7b7fa..82c7239fa7b 100644
--- a/llvm/lib/Target/PowerPC/README.txt
+++ b/llvm/lib/Target/PowerPC/README.txt
@@ -658,3 +658,9 @@ Instruction fusion was introduced in ISA 2.06 and more opportunities added in
 ISA 2.07.  LLVM needs to add infrastructure to recognize fusion opportunities
 and force instruction pairs to be scheduled together.
 
+-----------------------------------------------------------------------------
+
+More general handling of any_extend and zero_extend:
+
+See https://reviews.llvm.org/D24924#555306
+
diff --git a/llvm/test/CodeGen/PowerPC/anyext_srl.ll b/llvm/test/CodeGen/PowerPC/anyext_srl.ll
new file mode 100644
index 00000000000..1d729d2c359
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/anyext_srl.ll
@@ -0,0 +1,29 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr8 < %s | FileCheck %s
+
+%class.PB2 = type { [1 x i32], %class.PB1* }
+%class.PB1 = type { [1 x i32], i64, i64, i32 }
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @foo(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr {
+entry:
+  %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+  %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+  %and.i = and i32 %0, 8
+  %cmp.i = icmp ne i32 %and.i, 0
+  %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+  %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+  %and.i4 = and i32 %1, 8
+  %cmp.i5 = icmp ne i32 %and.i4, 0
+  %cmp = xor i1 %cmp.i, %cmp.i5
+  ret i1 %cmp
+; CHECK-LABEL: @foo
+; CHECK: rldicl  {{[0-9]+}}, {{[0-9]+}}, 61, 63
+
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}
+
author	Ehsan Amiri <amehsan@ca.ibm.com>	2016-10-24 15:46:58 +0000
committer	Ehsan Amiri <amehsan@ca.ibm.com>	2016-10-24 15:46:58 +0000
commit	1f31e9157de78e07e39a62bddefe9a137fee0216 (patch)
tree	f3e59fa74f8ef42ccd60f8d889725d8ac6c52229
parent	7b12e367406347ac22716b97198581a86dee7152 (diff)
download	bcm5719-llvm-1f31e9157de78e07e39a62bddefe9a137fee0216.tar.gz bcm5719-llvm-1f31e9157de78e07e39a62bddefe9a137fee0216.zip