diff options
author | Ehsan Amiri <amehsan@ca.ibm.com> | 2016-10-24 15:46:58 +0000 |
---|---|---|
committer | Ehsan Amiri <amehsan@ca.ibm.com> | 2016-10-24 15:46:58 +0000 |
commit | 1f31e9157de78e07e39a62bddefe9a137fee0216 (patch) | |
tree | f3e59fa74f8ef42ccd60f8d889725d8ac6c52229 | |
parent | 7b12e367406347ac22716b97198581a86dee7152 (diff) | |
download | bcm5719-llvm-1f31e9157de78e07e39a62bddefe9a137fee0216.tar.gz bcm5719-llvm-1f31e9157de78e07e39a62bddefe9a137fee0216.zip |
[PPC] Better codegen for AND, ANY_EXT, SRL sequence
https://reviews.llvm.org/D24924
This improves the code generated for a sequence of AND, ANY_EXT, SRL instructions. This is a targetted fix for this special pattern. The pattern is generated by target independet dag combiner and so a more general fix may not be necessary. If we come across other similar cases, some ideas for handling it are discussed on the code review.
llvm-svn: 284983
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/README.txt | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/anyext_srl.ll | 29 |
3 files changed, 52 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 150b0c81398..1395a4b4099 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2657,6 +2657,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) { MB = 64 - countTrailingOnes(Imm64); SH = 0; + if (Val.getOpcode() == ISD::ANY_EXTEND) { + auto Op0 = Val.getOperand(0); + if ( Op0.getOpcode() == ISD::SRL && + isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { + + auto ResultType = Val.getNode()->getValueType(0); + auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, + ResultType); + SDValue IDVal (ImDef, 0); + + Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, + ResultType, IDVal, Op0.getOperand(0), + getI32Imm(1, dl)), 0); + SH = 64 - Imm; + } + } + // If the operand is a logical right shift, we can fold it into this // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) // for n <= mb. The right shift is really a left rotate followed by a diff --git a/llvm/lib/Target/PowerPC/README.txt b/llvm/lib/Target/PowerPC/README.txt index f1d4ca7b7fa..82c7239fa7b 100644 --- a/llvm/lib/Target/PowerPC/README.txt +++ b/llvm/lib/Target/PowerPC/README.txt @@ -658,3 +658,9 @@ Instruction fusion was introduced in ISA 2.06 and more opportunities added in ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities and force instruction pairs to be scheduled together. +----------------------------------------------------------------------------- + +More general handling of any_extend and zero_extend: + +See https://reviews.llvm.org/D24924#555306 + diff --git a/llvm/test/CodeGen/PowerPC/anyext_srl.ll b/llvm/test/CodeGen/PowerPC/anyext_srl.ll new file mode 100644 index 00000000000..1d729d2c359 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/anyext_srl.ll @@ -0,0 +1,29 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s + +%class.PB2 = type { [1 x i32], %class.PB1* } +%class.PB1 = type { [1 x i32], i64, i64, i32 } + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @foo(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %cmp.i = icmp ne i32 %and.i, 0 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp ne i32 %and.i4, 0 + %cmp = xor i1 %cmp.i, %cmp.i5 + ret i1 %cmp +; CHECK-LABEL: @foo +; CHECK: rldicl {{[0-9]+}}, {{[0-9]+}}, 61, 63 + +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C++ TBAA"} + |