diff options
author | Sanjay Patel <spatel@rotateright.com> | 2019-12-12 15:20:42 -0500 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2019-12-12 15:44:13 -0500 |
commit | 8963332c3327daa652ba3e26d35f9109b6991985 (patch) | |
tree | fec4e5f2d7f648aac684e5683b02f67e769b9b01 | |
parent | 27ec4abeac4dae65a307c4eb206110ba8c2ca2ce (diff) | |
download | bcm5719-llvm-8963332c3327daa652ba3e26d35f9109b6991985.tar.gz bcm5719-llvm-8963332c3327daa652ba3e26d35f9109b6991985.zip |
[DAGCombiner] fold shift-trunc-shift to shift-mask-trunc
This fold is done in IR by instcombine, and we have a special
form of it already here in DAGCombiner, but we want the more
general transform too:
https://rise4fun.com/Alive/3jZm
Name: general
Pre: (C1 + zext(C2) < 64)
%s = lshr i64 %x, C1
%t = trunc i64 %s to i16
%r = lshr i16 %t, C2
=>
%s2 = lshr i64 %x, C1 + zext(C2)
%a = and i64 %s2, zext((1 << (16 - C2)) - 1)
%r = trunc %a to i16
Name: special
Pre: C1 == 48
%s = lshr i64 %x, C1
%t = trunc i64 %s to i16
%r = lshr i16 %t, C2
=>
%s2 = lshr i64 %x, C1 + zext(C2)
%r = trunc %s2 to i16
...because D58017 exposes a regression without this fold.
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/shift-amount-mod.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/trunc-srl-load.ll | 3 |
3 files changed, 14 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c462a6bcb9f..fdc9f48fbcc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7943,6 +7943,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { InnerShift.getOperand(0), NewShiftAmt); return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift); } + // In the more general case, we can clear the high bits after the shift: + // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask) + if (N0.hasOneUse() && InnerShift.hasOneUse() && c1 + c2 <= OpSizeInBits) { + SDLoc DL(N); + SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT); + SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT, + InnerShift.getOperand(0), NewShiftAmt); + SDValue Mask = DAG.getConstant((1 << (InnerShiftSize - c2)) - 1, DL, + InnerShiftVT); + SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask); + return DAG.getNode(ISD::TRUNCATE, DL, VT, And); + } } } diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll index 4f6051e2a6c..403839044cd 100644 --- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll +++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll @@ -670,8 +670,7 @@ define i64 @reg64_lshr_by_masked_negated_unfolded_add_b(i64 %val, i64 %a, i64 %b define i32 @t(i64 %x) { ; CHECK-LABEL: t: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #13 -; CHECK-NEXT: ubfx x0, x8, #4, #28 +; CHECK-NEXT: ubfx x0, x0, #17, #28 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %s = lshr i64 %x, 13 diff --git a/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll b/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll index a1af256eccb..5dc0534ef44 100644 --- a/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll +++ b/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll @@ -25,8 +25,7 @@ cond.false: ; preds = %entry define i32 @sh_trunc_sh(i64 %x) { ; CHECK-LABEL: sh_trunc_sh: ; CHECK: # %bb.0: -; CHECK-NEXT: rldicl 3, 3, 51, 13 -; CHECK-NEXT: srwi 3, 3, 4 +; CHECK-NEXT: rldicl 3, 3, 47, 36 ; CHECK-NEXT: blr %s = lshr i64 %x, 13 %t = trunc i64 %s to i32 |