summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp25
-rw-r--r--llvm/test/CodeGen/X86/combine-rotates.ll42
-rw-r--r--llvm/test/CodeGen/X86/rotate4.ll20
3 files changed, 30 insertions, 57 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eb02c6502ba..fa4c1dd4aae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4823,7 +4823,8 @@ bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
-static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
+ SelectionDAG &DAG) {
// If EltSize is a power of 2 then:
//
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -4858,9 +4859,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
unsigned MaskLoBits = 0;
if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
- if (NegC->getAPIntValue() == EltSize - 1) {
+ KnownBits Known;
+ DAG.computeKnownBits(Neg.getOperand(0), Known);
+ unsigned Bits = Log2_64(EltSize);
+ if (NegC->getAPIntValue().getActiveBits() <= Bits &&
+ ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
Neg = Neg.getOperand(0);
- MaskLoBits = Log2_64(EltSize);
+ MaskLoBits = Bits;
}
}
}
@@ -4875,10 +4880,16 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
// On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
// Pos'. The truncation is redundant for the purpose of the equality.
- if (MaskLoBits && Pos.getOpcode() == ISD::AND)
- if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
- if (PosC->getAPIntValue() == EltSize - 1)
+ if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
+ KnownBits Known;
+ DAG.computeKnownBits(Pos.getOperand(0), Known);
+ if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
+ ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
+ MaskLoBits))
Pos = Pos.getOperand(0);
+ }
+ }
// The condition we need is now:
//
@@ -4934,7 +4945,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg).getNode();
diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll
index 1477864ae0a..67dab647a0a 100644
--- a/llvm/test/CodeGen/X86/combine-rotates.ll
+++ b/llvm/test/CodeGen/X86/combine-rotates.ll
@@ -61,27 +61,14 @@ define <4 x i32> @combine_vec_rot_rot_splat_zero(<4 x i32> %x) {
define <4 x i32> @rotate_demanded_bits(<4 x i32>, <4 x i32>) {
; XOP-LABEL: rotate_demanded_bits:
; XOP: # %bb.0:
-; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [30,30,30,30]
-; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm3
-; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1
-; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1
-; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: vpor %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
; AVX512-LABEL: rotate_demanded_bits:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [30,30,30,30]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm3
-; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512-NEXT: vpsubd %xmm1, %xmm4, %xmm1
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512-NEXT: vprolvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%3 = and <4 x i32> %1, <i32 30, i32 30, i32 30, i32 30>
%4 = shl <4 x i32> %0, %3
@@ -117,28 +104,15 @@ define <4 x i32> @rotate_demanded_bits_3(<4 x i32>, <4 x i32>) {
; XOP-LABEL: rotate_demanded_bits_3:
; XOP: # %bb.0:
; XOP-NEXT: vpaddd %xmm1, %xmm1, %xmm1
-; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [30,30,30,30]
-; XOP-NEXT: vpand %xmm2, %xmm1, %xmm3
-; XOP-NEXT: vpshld %xmm3, %xmm0, %xmm3
-; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1
-; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1
-; XOP-NEXT: vpsubd %xmm1, %xmm4, %xmm1
-; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm0
-; XOP-NEXT: vpor %xmm0, %xmm3, %xmm0
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
; AVX512-LABEL: rotate_demanded_bits_3:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [30,30,30,30]
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm3
-; AVX512-NEXT: vpsllvd %xmm3, %xmm0, %xmm3
-; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX512-NEXT: vpsubd %xmm1, %xmm4, %xmm1
-; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpor %xmm0, %xmm3, %xmm0
+; AVX512-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512-NEXT: vprolvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%3 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%4 = and <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
diff --git a/llvm/test/CodeGen/X86/rotate4.ll b/llvm/test/CodeGen/X86/rotate4.ll
index 7982b454d1b..d2a76c4e6d6 100644
--- a/llvm/test/CodeGen/X86/rotate4.ll
+++ b/llvm/test/CodeGen/X86/rotate4.ll
@@ -284,15 +284,9 @@ define void @rotate_right_m16(i16* %p, i32 %amount) {
define i32 @rotate_demanded_bits(i32, i32) {
; CHECK-LABEL: rotate_demanded_bits:
; CHECK: # %bb.0:
+; CHECK-NEXT: andb $30, %sil
; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: andl $30, %ecx
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: shll %cl, %eax
-; CHECK-NEXT: negl %ecx
-; CHECK-NEXT: andb $30, %cl
-; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shrl %cl, %edi
-; CHECK-NEXT: orl %eax, %edi
+; CHECK-NEXT: roll %cl, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%3 = and i32 %1, 30
@@ -324,16 +318,10 @@ define i32 @rotate_demanded_bits_2(i32, i32) {
define i32 @rotate_demanded_bits_3(i32, i32) {
; CHECK-LABEL: rotate_demanded_bits_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: addl %esi, %esi
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: andb $30, %cl
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: shll %cl, %eax
-; CHECK-NEXT: negl %esi
+; CHECK-NEXT: addb %sil, %sil
; CHECK-NEXT: andb $30, %sil
; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: shrl %cl, %edi
-; CHECK-NEXT: orl %eax, %edi
+; CHECK-NEXT: roll %cl, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%3 = shl i32 %1, 1
OpenPOWER on IntegriCloud