diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-31 19:50:45 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-31 19:50:45 +0000 |
commit | 6be48e4aa70c50d58b9759187e8f601a2da7b23c (patch) | |
tree | f4c648ece065a63d761bf8c761c599cc36f7df2d | |
parent | 600495266100380fd7152feb7333b0353ce839b7 (diff) | |
download | bcm5719-llvm-6be48e4aa70c50d58b9759187e8f601a2da7b23c.tar.gz bcm5719-llvm-6be48e4aa70c50d58b9759187e8f601a2da7b23c.zip |
[X86] Improve 64-bit shifts on 32-bit targets (PR14593)
As discussed on PR14593, this patch adds support for lowering to SHLD/SHRD from the patterns generated by DAGTypeLegalizer::ExpandShiftWithKnownAmountBit.
Differential Revision: https://reviews.llvm.org/D23000
llvm-svn: 277299
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 34 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/legalize-shift-64.ll | 48 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/shift-double-x86_64.ll | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/shift-double.ll | 91 |
4 files changed, 72 insertions, 127 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 32382da57ce..311455aea0c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28631,18 +28631,23 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, unsigned Opc = X86ISD::SHLD; SDValue Op0 = N0.getOperand(0); SDValue Op1 = N1.getOperand(0); - if (ShAmt0.getOpcode() == ISD::SUB) { + if (ShAmt0.getOpcode() == ISD::SUB || + ShAmt0.getOpcode() == ISD::XOR) { Opc = X86ISD::SHRD; std::swap(Op0, Op1); std::swap(ShAmt0, ShAmt1); } + // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C ) + // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C ) + // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C ) + // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C ) unsigned Bits = VT.getSizeInBits(); if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { SDValue ShAmt1Op1 = ShAmt1.getOperand(1); - if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE) + if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE) ShAmt1Op1 = ShAmt1Op1.getOperand(0); if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) return DAG.getNode(Opc, DL, VT, @@ -28652,12 +28657,33 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, } } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) { ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0); - if (ShAmt0C && - ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits) + if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), N1.getOperand(0), DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); + } else if (ShAmt1.getOpcode() == ISD::XOR) { + SDValue Mask = ShAmt1.getOperand(1); + if (ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask)) { + unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL); + SDValue ShAmt1Op0 = ShAmt1.getOperand(0); + if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE) + ShAmt1Op0 = ShAmt1Op0.getOperand(0); + if (MaskC->getSExtValue() == (Bits - 1) && ShAmt1Op0 == ShAmt0) { + if (Op1.getOpcode() == InnerShift && + isa<ConstantSDNode>(Op1.getOperand(1)) && + Op1.getConstantOperandVal(1) == 1) { + return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0), + DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); + } + // Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ). + if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD && + Op1.getOperand(0) == Op1.getOperand(1)) { + return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0), + DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0)); + } + } + } } return SDValue(); diff --git a/llvm/test/CodeGen/X86/legalize-shift-64.ll b/llvm/test/CodeGen/X86/legalize-shift-64.ll index fff917f1bfa..4aabc69b0ff 100644 --- a/llvm/test/CodeGen/X86/legalize-shift-64.ll +++ b/llvm/test/CodeGen/X86/legalize-shift-64.ll @@ -24,19 +24,13 @@ define i64 @test2(i64 %xx, i32 %test) nounwind { ; CHECK-LABEL: test2: ; CHECK: # BB#0: ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch -; CHECK-NEXT: andb $7, %ch -; CHECK-NEXT: movb %ch, %cl -; CHECK-NEXT: shll %cl, %esi -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: shrl %edx -; CHECK-NEXT: xorb $31, %cl -; CHECK-NEXT: shrl %cl, %edx -; CHECK-NEXT: orl %esi, %edx -; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: andb $7, %cl +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: shll %cl, %eax +; CHECK-NEXT: shldl %cl, %esi, %edx ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %and = and i32 %test, 7 @@ -48,20 +42,12 @@ define i64 @test2(i64 %xx, i32 %test) nounwind { define i64 @test3(i64 %xx, i32 %test) nounwind { ; CHECK-LABEL: test3: ; CHECK: # BB#0: -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch -; CHECK-NEXT: andb $7, %ch -; CHECK-NEXT: movb %ch, %cl -; CHECK-NEXT: shrl %cl, %esi -; CHECK-NEXT: leal (%edx,%edx), %eax -; CHECK-NEXT: xorb $31, %cl -; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: andb $7, %cl +; CHECK-NEXT: shrdl %cl, %edx, %eax ; CHECK-NEXT: shrl %cl, %edx -; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %and = and i32 %test, 7 %sh_prom = zext i32 %and to i64 @@ -72,20 +58,12 @@ define i64 @test3(i64 %xx, i32 %test) nounwind { define i64 @test4(i64 %xx, i32 %test) nounwind { ; CHECK-LABEL: test4: ; CHECK: # BB#0: -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch -; CHECK-NEXT: andb $7, %ch -; CHECK-NEXT: movb %ch, %cl -; CHECK-NEXT: shrl %cl, %esi -; CHECK-NEXT: leal (%edx,%edx), %eax -; CHECK-NEXT: xorb $31, %cl -; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: andb $7, %cl +; CHECK-NEXT: shrdl %cl, %edx, %eax ; CHECK-NEXT: sarl %cl, %edx -; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %and = and i32 %test, 7 %sh_prom = zext i32 %and to i64 diff --git a/llvm/test/CodeGen/X86/shift-double-x86_64.ll b/llvm/test/CodeGen/X86/shift-double-x86_64.ll index 706f066f21b..28f6731e25e 100644 --- a/llvm/test/CodeGen/X86/shift-double-x86_64.ll +++ b/llvm/test/CodeGen/X86/shift-double-x86_64.ll @@ -67,13 +67,7 @@ define i64 @test5(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test5: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: xorl $63, %ecx -; CHECK-NEXT: shrq %rsi -; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> -; CHECK-NEXT: shrq %cl, %rsi -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shlq %cl, %rdi -; CHECK-NEXT: orq %rsi, %rdi +; CHECK-NEXT: shldq %cl, %rsi, %rdi ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 @@ -88,13 +82,8 @@ define i64 @test6(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test6: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: xorl $63, %ecx -; CHECK-NEXT: leaq (%rsi,%rsi), %rax -; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> -; CHECK-NEXT: shlq %cl, %rax -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrq %cl, %rdi -; CHECK-NEXT: orq %rdi, %rax +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = shl i64 %lo, 1 @@ -108,13 +97,8 @@ define i64 @test7(i64 %hi, i64 %lo, i64 %bits) nounwind { ; CHECK-LABEL: test7: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: xorl $63, %ecx -; CHECK-NEXT: leaq (%rsi,%rsi), %rax -; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> -; CHECK-NEXT: shlq %cl, %rax -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrq %cl, %rdi -; CHECK-NEXT: orq %rdi, %rax +; CHECK-NEXT: shrdq %cl, %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq %bits64 = xor i64 %bits, 63 %lo2 = add i64 %lo, %lo diff --git a/llvm/test/CodeGen/X86/shift-double.ll b/llvm/test/CodeGen/X86/shift-double.ll index 0cb6a575255..ba9baea9862 100644 --- a/llvm/test/CodeGen/X86/shift-double.ll +++ b/llvm/test/CodeGen/X86/shift-double.ll @@ -151,19 +151,13 @@ define i64 @test8(i64 %val, i32 %bits) nounwind { ; CHECK-LABEL: test8: ; CHECK: # BB#0: ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movb %ch, %cl -; CHECK-NEXT: shll %cl, %esi -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: shrl %edx -; CHECK-NEXT: andb $31, %cl -; CHECK-NEXT: xorb $31, %cl -; CHECK-NEXT: shrl %cl, %edx -; CHECK-NEXT: orl %esi, %edx -; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: shll %cl, %eax +; CHECK-NEXT: andb $31, %cl +; CHECK-NEXT: shldl %cl, %esi, %edx ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %and = and i32 %bits, 31 @@ -176,18 +170,13 @@ define i64 @test9(i64 %val, i32 %bits) nounwind { ; CHECK-LABEL: test9: ; CHECK: # BB#0: ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movb %ch, %cl -; CHECK-NEXT: shrl %cl, %esi -; CHECK-NEXT: leal (%edx,%edx), %eax -; CHECK-NEXT: andb $31, %cl -; CHECK-NEXT: xorb $31, %cl -; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: sarl %cl, %edx +; CHECK-NEXT: andb $31, %cl +; CHECK-NEXT: shrdl %cl, %esi, %eax ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %and = and i32 %bits, 31 @@ -200,18 +189,13 @@ define i64 @test10(i64 %val, i32 %bits) nounwind { ; CHECK-LABEL: test10: ; CHECK: # BB#0: ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %ch +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movb %ch, %cl -; CHECK-NEXT: shrl %cl, %esi -; CHECK-NEXT: leal (%edx,%edx), %eax -; CHECK-NEXT: andb $31, %cl -; CHECK-NEXT: xorb $31, %cl -; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: shrl %cl, %edx +; CHECK-NEXT: andb $31, %cl +; CHECK-NEXT: shrdl %cl, %esi, %eax ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl %and = and i32 %bits, 31 @@ -291,19 +275,10 @@ define i32 @test14(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test15(i32 %hi, i32 %lo, i32 %bits) nounwind { ; CHECK-LABEL: test15: ; CHECK: # BB#0: -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: xorl $31, %ecx -; CHECK-NEXT: shrl %esi -; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> -; CHECK-NEXT: shrl %cl, %esi -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: popl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shldl %cl, %edx, %eax ; CHECK-NEXT: retl %bits32 = xor i32 %bits, 31 %lo2 = lshr i32 %lo, 1 @@ -316,19 +291,10 @@ define i32 @test15(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test16(i32 %hi, i32 %lo, i32 %bits) nounwind { ; CHECK-LABEL: test16: ; CHECK: # BB#0: -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: xorl $31, %ecx -; CHECK-NEXT: addl %esi, %esi -; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> -; CHECK-NEXT: shll %cl, %esi -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrl %cl, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: popl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax ; CHECK-NEXT: retl %bits32 = xor i32 %bits, 31 %lo2 = shl i32 %lo, 1 @@ -341,19 +307,10 @@ define i32 @test16(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test17(i32 %hi, i32 %lo, i32 %bits) nounwind { ; CHECK-LABEL: test17: ; CHECK: # BB#0: -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: xorl $31, %ecx -; CHECK-NEXT: addl %esi, %esi -; CHECK-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> -; CHECK-NEXT: shll %cl, %esi -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrl %cl, %eax -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: popl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrdl %cl, %edx, %eax ; CHECK-NEXT: retl %bits32 = xor i32 %bits, 31 %lo2 = add i32 %lo, %lo |