summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-10-16 10:26:19 +0000
committerRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-10-16 10:26:19 +0000
commit374a0e50c4ea267bc98c10aafab3661356231701 (patch)
treec3e7d37b3adf30a882f7fb7815cce550d997a0aa
parent1fe87e031ce8f4b75efb933c4a389ec578d1ff85 (diff)
downloadbcm5719-llvm-374a0e50c4ea267bc98c10aafab3661356231701.tar.gz
bcm5719-llvm-374a0e50c4ea267bc98c10aafab3661356231701.zip
Handle (shl (anyext (shr ...))) in SimpilfyDemandedBits
This is really an extension of the current (shl (shr ...)) -> shl optimization. The main difference is that certain upper bits must also not be demanded. The motivating examples are the first two in the testcase, which occur in llvmpipe output. llvm-svn: 192783
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp25
-rw-r--r--llvm/test/CodeGen/SystemZ/shift-10.ll67
2 files changed, 92 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 17631c0c9d9..9d6a3b40b91 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -635,6 +635,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
NarrowShl));
}
+ // Repeat the SHL optimization above in cases where an extension
+ // intervenes: (shl (anyext (shr x, c1)), c2) to
+ // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
+ // aren't demanded (as above) and that the shifted upper c1 bits of
+ // x aren't demanded.
+ if (InOp.hasOneUse() &&
+ InnerOp.getOpcode() == ISD::SRL &&
+ InnerOp.hasOneUse() &&
+ isa<ConstantSDNode>(InnerOp.getOperand(1))) {
+ uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
+ ->getZExtValue();
+ if (InnerShAmt < ShAmt &&
+ InnerShAmt < InnerBits &&
+ NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 &&
+ NewMask.trunc(ShAmt) == 0) {
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt,
+ Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+ InnerOp.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
+ NewExt, NewSA));
+ }
+ }
}
KnownZero <<= SA->getZExtValue();
diff --git a/llvm/test/CodeGen/SystemZ/shift-10.ll b/llvm/test/CodeGen/SystemZ/shift-10.ll
new file mode 100644
index 00000000000..47cd0027f14
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/shift-10.ll
@@ -0,0 +1,67 @@
+; Test compound shifts.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test a shift right followed by a sign extension. This can use two shifts.
+define i64 @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: sllg [[REG:%r[0-5]]], %r2, 62
+; CHECK: srag %r2, [[REG]], 63
+; CHECK: br %r14
+ %shr = lshr i32 %a, 1
+ %trunc = trunc i32 %shr to i1
+ %ext = sext i1 %trunc to i64
+ ret i64 %ext
+}
+
+; ...and again with the highest shift count.
+define i64 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: sllg [[REG:%r[0-5]]], %r2, 32
+; CHECK: srag %r2, [[REG]], 63
+; CHECK: br %r14
+ %shr = lshr i32 %a, 31
+ %trunc = trunc i32 %shr to i1
+ %ext = sext i1 %trunc to i64
+ ret i64 %ext
+}
+
+; Test a left shift that of an extended right shift in a case where folding
+; is possible.
+define i64 @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: risbg %r2, %r2, 27, 181, 9
+; CHECK: br %r14
+ %shr = lshr i32 %a, 1
+ %ext = zext i32 %shr to i64
+ %shl = shl i64 %ext, 10
+ %and = and i64 %shl, 137438952960
+ ret i64 %and
+}
+
+; ...and again with a larger right shift.
+define i64 @f4(i32 %a) {
+; CHECK-LABEL: f4:
+; CHECK: risbg %r2, %r2, 30, 158, 3
+; CHECK: br %r14
+ %shr = lshr i32 %a, 30
+ %ext = sext i32 %shr to i64
+ %shl = shl i64 %ext, 33
+ %and = and i64 %shl, 8589934592
+ ret i64 %and
+}
+
+; Repeat the previous test in a case where all bits outside the
+; bottom 3 matter. FIXME: can still use RISBG here.
+define i64 @f5(i32 %a) {
+; CHECK-LABEL: f5:
+; CHECK: srl %r2, 30
+; CHECK: sllg %r2, %r2, 33
+; CHECK: lhi %r2, 7
+; CHECK: br %r14
+ %shr = lshr i32 %a, 30
+ %ext = sext i32 %shr to i64
+ %shl = shl i64 %ext, 33
+ %or = or i64 %shl, 7
+ ret i64 %or
+}
OpenPOWER on IntegriCloud