diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2019-08-02 19:33:46 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2019-08-02 19:33:46 +0000 |
| commit | 68264558f9998c5d72dcb7b4bc7d784d77a923ea (patch) | |
| tree | 962102d962a5ff55349ad7756c199391b0478d08 | |
| parent | d5765ee5645948782bab583f6f8bb8a248f6c263 (diff) | |
| download | bcm5719-llvm-68264558f9998c5d72dcb7b4bc7d784d77a923ea.tar.gz bcm5719-llvm-68264558f9998c5d72dcb7b4bc7d784d77a923ea.zip | |
[DAGCombiner] try to convert opposing shifts to casts
This reverses a questionable IR canonicalization when a truncate
is free:
sra (add (shl X, N1C), AddC), N1C -->
sext (add (trunc X to (width - N1C)), AddC')
https://rise4fun.com/Alive/slRC
More details in PR42644:
https://bugs.llvm.org/show_bug.cgi?id=42644
I limited this to pre-legalization for code simplicity because that
should be enough to reverse the IR patterns. I don't have any
evidence (no regression test diffs) that we need to try this later.
Differential Revision: https://reviews.llvm.org/D65607
llvm-svn: 367710
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/shift-mod.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/shift-combine.ll | 56 |
3 files changed, 54 insertions, 38 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9d8850b59f5..656334b3dba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7616,6 +7616,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. + // sra (add (shl X, N1C), AddC), N1C --> + // sext (add (trunc X to (width - N1C)), AddC') + if (!LegalOperations && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + N0.getOperand(0).getOpcode() == ISD::SHL && + N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { + if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { + SDValue Shl = N0.getOperand(0); + // Determine what the truncate's type would be and ask the target if that + // is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + if (TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). + trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + return DAG.getSExtOrTrunc(Add, DL, VT); + } + } + } + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { diff --git a/llvm/test/CodeGen/AArch64/shift-mod.ll b/llvm/test/CodeGen/AArch64/shift-mod.ll index 9bfd1d835a0..6c3521d2600 100644 --- a/llvm/test/CodeGen/AArch64/shift-mod.ll +++ b/llvm/test/CodeGen/AArch64/shift-mod.ll @@ -78,9 +78,8 @@ entry: define i64 @ashr_add_shl_i32(i64 %r) { ; CHECK-LABEL: ashr_add_shl_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4294967296 -; CHECK-NEXT: add x8, x8, x0, lsl #32 -; CHECK-NEXT: asr x0, x8, #32 +; CHECK-NEXT: add w8, w0, #1 // =1 +; CHECK-NEXT: sxtw x0, w8 ; CHECK-NEXT: ret %conv = shl i64 %r, 32 %sext = add i64 %conv, 4294967296 @@ -91,9 +90,8 @@ define i64 @ashr_add_shl_i32(i64 %r) { define i64 @ashr_add_shl_i8(i64 %r) { ; CHECK-LABEL: ashr_add_shl_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #72057594037927936 -; CHECK-NEXT: add x8, x8, x0, lsl #56 -; CHECK-NEXT: asr x0, x8, #56 +; CHECK-NEXT: add w8, w0, #1 // =1 +; CHECK-NEXT: sxtb x0, w8 ; CHECK-NEXT: ret %conv = shl i64 %r, 56 %sext = add i64 %conv, 72057594037927936 diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll index a470d095be0..d61838f2f1c 100644 --- a/llvm/test/CodeGen/X86/shift-combine.ll +++ b/llvm/test/CodeGen/X86/shift-combine.ll @@ -168,10 +168,8 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind { ; ; X64-LABEL: ashr_add_shl_i32: ; X64: # %bb.0: -; X64-NEXT: shlq $32, %rdi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: addq %rdi, %rax -; X64-NEXT: sarq $32, %rax +; X64-NEXT: incl %edi +; X64-NEXT: movslq %edi, %rax ; X64-NEXT: retq %conv = shl i64 %r, 32 %sext = add i64 %conv, 4294967296 @@ -182,20 +180,17 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind { define i64 @ashr_add_shl_i8(i64 %r) nounwind { ; X32-LABEL: ashr_add_shl_i8: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shll $24, %edx -; X32-NEXT: addl $33554432, %edx # imm = 0x2000000 -; X32-NEXT: movl %edx, %eax -; X32-NEXT: sarl $24, %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: addb $2, %al +; X32-NEXT: movsbl %al, %eax +; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl ; ; X64-LABEL: ashr_add_shl_i8: ; X64: # %bb.0: -; X64-NEXT: shlq $56, %rdi -; X64-NEXT: movabsq $144115188075855872, %rax # imm = 0x200000000000000 -; X64-NEXT: addq %rdi, %rax -; X64-NEXT: sarq $56, %rax +; X64-NEXT: addb $2, %dil +; X64-NEXT: movsbq %dil, %rax ; X64-NEXT: retq %conv = shl i64 %r, 56 %sext = add i64 %conv, 144115188075855872 @@ -209,34 +204,31 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) nounwind { ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: shll $24, %edi -; X32-NEXT: shll $24, %esi -; X32-NEXT: shll $24, %edx -; X32-NEXT: shll $24, %ecx -; X32-NEXT: addl $16777216, %ecx # imm = 0x1000000 -; X32-NEXT: addl $16777216, %edx # imm = 0x1000000 -; X32-NEXT: addl $16777216, %esi # imm = 0x1000000 -; X32-NEXT: addl $16777216, %edi # imm = 0x1000000 -; X32-NEXT: sarl $24, %edi -; X32-NEXT: sarl $24, %esi -; X32-NEXT: sarl $24, %edx -; X32-NEXT: sarl $24, %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; X32-NEXT: movb {{[0-9]+}}(%esp), %ch +; X32-NEXT: movb {{[0-9]+}}(%esp), %dh +; X32-NEXT: incb %dh +; X32-NEXT: movsbl %dh, %esi +; X32-NEXT: incb %ch +; X32-NEXT: movsbl %ch, %edi +; X32-NEXT: incb %dl +; X32-NEXT: movsbl %dl, %edx +; X32-NEXT: incb %cl +; X32-NEXT: movsbl %cl, %ecx ; X32-NEXT: movl %ecx, 12(%eax) ; X32-NEXT: movl %edx, 8(%eax) -; X32-NEXT: movl %esi, 4(%eax) -; X32-NEXT: movl %edi, (%eax) +; X32-NEXT: movl %edi, 4(%eax) +; X32-NEXT: movl %esi, (%eax) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: retl $4 ; ; X64-LABEL: ashr_add_shl_v4i8: ; X64: # %bb.0: +; X64-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-NEXT: psubd %xmm1, %xmm0 ; X64-NEXT: pslld $24, %xmm0 -; X64-NEXT: paddd {{.*}}(%rip), %xmm0 ; X64-NEXT: psrad $24, %xmm0 ; X64-NEXT: retq %conv = shl <4 x i32> %r, <i32 24, i32 24, i32 24, i32 24> |

