summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h43
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp79
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp13
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp27
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h7
-rw-r--r--llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll83
-rw-r--r--llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll80
-rw-r--r--llvm/test/CodeGen/AMDGPU/commute-shifts.ll12
-rw-r--r--llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll811
-rw-r--r--llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll966
-rw-r--r--llvm/test/CodeGen/PowerPC/shift-cmp.ll16
-rw-r--r--llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll436
-rw-r--r--llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll246
16 files changed, 1330 insertions, 1500 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cafea6da663..ebe8872e5de 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -539,6 +539,12 @@ public:
return hasAndNotCompare(X);
}
+ /// Return true if the target has a bit-test instruction:
+ /// (X & (1 << Y)) ==/!= 0
+ /// This knowledge can be used to prevent breaking the pattern,
+ /// or creating it if it could be recognized.
+ virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
+
/// There are two ways to clear extreme bits (either low or high):
/// Mask: x & (-1 << y) (the instcombine canonical form)
/// Shifts: x >> y << y
@@ -571,6 +577,38 @@ public:
return false;
}
+ /// Given the pattern
+ /// (X & (C l>>/<< Y)) ==/!= 0
+ /// return true if it should be transformed into:
+ /// ((X <</l>> Y) & C) ==/!= 0
+ /// WARNING: if 'X' is a constant, the fold may deadlock!
+ /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
+ /// here because it can end up being not linked in.
+ virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const {
+ if (hasBitTest(X, Y)) {
+ // One interesting pattern that we'd want to form is 'bit test':
+ // ((1 << Y) & C) ==/!= 0
+ // But we also need to be careful not to try to reverse that fold.
+
+ // Is this '1 << Y' ?
+ if (OldShiftOpcode == ISD::SHL && CC->isOne())
+ return false; // Keep the 'bit test' pattern.
+
+ // Will it be '1 << Y' after the transform ?
+ if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
+ return true; // Do form the 'bit test' pattern.
+ }
+
+ // If 'X' is a constant, and we transform, then we will immediately
+ // try to undo the fold, thus causing endless combine loop.
+ // So by default, let's assume everyone prefers the fold
+ // iff 'X' is not a constant.
+ return !XC;
+ }
+
/// These two forms are equivalent:
/// sub %y, (xor %x, -1)
/// add (add %x, 1), %y
@@ -4108,6 +4146,11 @@ private:
DAGCombinerInfo &DCI,
const SDLoc &DL) const;
+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+ SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL) const;
+
SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue CompTargetNode, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL,
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d6ac17e8e5e..8f2d45a1f40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2760,6 +2760,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
return T2;
}
+// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL) const {
+ assert(isConstOrConstSplat(N1C) &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ "Should be a comparison with 0.");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Valid only for [in]equality comparisons.");
+
+ unsigned NewShiftOpcode;
+ SDValue X, C, Y;
+
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Look for '(C l>>/<< Y)'.
+ auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
+ // The shift should be one-use.
+ if (!V.hasOneUse())
+ return false;
+ unsigned OldShiftOpcode = V.getOpcode();
+ switch (OldShiftOpcode) {
+ case ISD::SHL:
+ NewShiftOpcode = ISD::SRL;
+ break;
+ case ISD::SRL:
+ NewShiftOpcode = ISD::SHL;
+ break;
+ default:
+ return false; // must be a logical shift.
+ }
+ // We should be shifting a constant.
+ // FIXME: best to use isConstantOrConstantVector().
+ C = V.getOperand(0);
+ ConstantSDNode *CC =
+ isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ if (!CC)
+ return false;
+ Y = V.getOperand(1);
+
+ ConstantSDNode *XC =
+ isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
+ };
+
+ // LHS of comparison should be an one-use 'and'.
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+
+ X = N0.getOperand(0);
+ SDValue Mask = N0.getOperand(1);
+
+ // 'and' is commutative!
+ if (!Match(Mask)) {
+ std::swap(X, Mask);
+ if (!Match(Mask))
+ return SDValue();
+ }
+
+ EVT VT = X.getValueType();
+
+ // Produce:
+ // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
+ SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
+ SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
+ return T2;
+}
+
/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
/// handle the commuted versions of these patterns.
@@ -3328,6 +3399,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+ if (C1.isNullValue())
+ if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ VT, N0, N1, Cond, DCI, dl))
+ return CC;
+ }
+
// If we have "setcc X, C0", check to see if we can shrink the immediate
// by changing cc.
// TODO: Support this for vectors after legalize ops.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b78837aae83..381d0ae41c7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12042,6 +12042,19 @@ bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
return Mask->getValue().isPowerOf2();
}
+bool AArch64TargetLowering::
+ shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const {
+ // Does baseline recommend not to perform the fold by default?
+ if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
+ return false;
+ // Else, if this is a vector shift, prefer 'shl'.
+ return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
+}
+
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
// Update IsSplitCSR in AArch64unctionInfo.
AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 34e1fdf441e..21a0b16e66b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -488,6 +488,11 @@ public:
return VT.getSizeInBits() >= 64; // vector 'bic'
}
+ bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const override;
+
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
if (DAG.getMachineFunction().getFunction().hasMinSize())
return false;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index fef5a98cdb0..269de1513e7 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1817,6 +1817,10 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}
+bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+ return X.getValueType().isScalarInteger(); // 'tstbit'
+}
+
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 4e467cb2272..e9222406925 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -127,6 +127,8 @@ namespace HexagonISD {
bool isCheapToSpeculateCtlz() const override { return true; }
bool isCtlzFast() const override { return true; }
+ bool hasBitTest(SDValue X, SDValue Y) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 14fa8e1ecf5..f0fd6ce7ae5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5022,6 +5022,33 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const {
return Subtarget.hasSSE2();
}
+bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+ return X.getValueType().isScalarInteger(); // 'bt'
+}
+
+bool X86TargetLowering::
+ shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const {
+ // Does baseline recommend not to perform the fold by default?
+ if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
+ return false;
+ // For scalars this transform is always beneficial.
+ if (X.getValueType().isScalarInteger())
+ return true;
+ // If all the shift amounts are identical, then transform is beneficial even
+ // with rudimentary SSE2 shifts.
+ if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
+ return true;
+ // If we have AVX2 with it's powerful shift operations, then it's also good.
+ if (Subtarget.hasAVX2())
+ return true;
+ // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
+ return NewShiftOpcode == ISD::SHL;
+}
+
bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {
assert(((N->getOpcode() == ISD::SHL &&
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index e0be03bc3f9..dd11cc4497a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -840,6 +840,13 @@ namespace llvm {
bool hasAndNot(SDValue Y) const override;
+ bool hasBitTest(SDValue X, SDValue Y) const override;
+
+ bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const override;
+
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index 433904a6fed..fcbe5a615c3 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -15,11 +15,9 @@
define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_signbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xff
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: tst w8, #0x80
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i8 128, %y
@@ -31,11 +29,9 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_lowestbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xff
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i8 1, %y
@@ -47,11 +43,9 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #24
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xff
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: tst w8, #0x18
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i8 24, %y
@@ -65,11 +59,9 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_signbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32768
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xffff
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: tst w8, #0x8000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i16 32768, %y
@@ -81,11 +73,9 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_lowestbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xffff
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i16 1, %y
@@ -97,11 +87,9 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #4080
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xffff
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: tst w8, #0xff0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i16 4080, %y
@@ -115,9 +103,8 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_signbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-2147483648
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: tst w8, w0
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: tst w8, #0x80000000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i32 2147483648, %y
@@ -129,9 +116,8 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_lowestbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: tst w8, w0
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i32 1, %y
@@ -143,9 +129,8 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_bitsinmiddle_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #16776960
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: tst w8, w0
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: tst w8, #0xffff00
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i32 16776960, %y
@@ -159,9 +144,8 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_signbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-9223372036854775808
-; CHECK-NEXT: lsr x8, x8, x1
-; CHECK-NEXT: tst x8, x0
+; CHECK-NEXT: lsl x8, x0, x1
+; CHECK-NEXT: tst x8, #0x8000000000000000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i64 9223372036854775808, %y
@@ -173,9 +157,8 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_lowestbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1
-; CHECK-NEXT: lsr x8, x8, x1
-; CHECK-NEXT: tst x8, x0
+; CHECK-NEXT: lsl x8, x0, x1
+; CHECK-NEXT: tst x8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i64 1, %y
@@ -187,9 +170,8 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_bitsinmiddle_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #281474976645120
-; CHECK-NEXT: lsr x8, x8, x1
-; CHECK-NEXT: tst x8, x0
+; CHECK-NEXT: lsl x8, x0, x1
+; CHECK-NEXT: tst x8, #0xffffffff0000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = lshr i64 281474976645120, %y
@@ -205,10 +187,9 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec_4xi32_splat_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: movi v2.4s, #1
-; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
@@ -238,10 +219,9 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: movi v2.4s, #1
-; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
@@ -288,12 +268,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_signbit_ne:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsr w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xff
-; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: lsl w8, w0, w1
+; CHECK-NEXT: ubfx w0, w8, #7, #1
; CHECK-NEXT: ret
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x
diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 856f294675e..90e007cbd77 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -15,11 +15,10 @@
define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_signbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-128
+; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xff
+; CHECK-NEXT: lsr w8, w8, w1
+; CHECK-NEXT: tst w8, #0x80
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i8 128, %y
@@ -31,11 +30,10 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_lowestbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1
+; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xff
+; CHECK-NEXT: lsr w8, w8, w1
+; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i8 1, %y
@@ -47,11 +45,10 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #24
+; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xff
+; CHECK-NEXT: lsr w8, w8, w1
+; CHECK-NEXT: tst w8, #0x18
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i8 24, %y
@@ -65,11 +62,10 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_signbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-32768
+; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xffff
+; CHECK-NEXT: lsr w8, w8, w1
+; CHECK-NEXT: tst w8, #0x8000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i16 32768, %y
@@ -81,11 +77,10 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_lowestbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1
+; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xffff
+; CHECK-NEXT: lsr w8, w8, w1
+; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i16 1, %y
@@ -97,11 +92,10 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #4080
+; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xffff
+; CHECK-NEXT: lsr w8, w8, w1
+; CHECK-NEXT: tst w8, #0xff0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i16 4080, %y
@@ -115,9 +109,8 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_signbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-2147483648
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: tst w8, w0
+; CHECK-NEXT: lsr w8, w0, w1
+; CHECK-NEXT: tst w8, #0x80000000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i32 2147483648, %y
@@ -129,9 +122,8 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_lowestbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: tst w8, w0
+; CHECK-NEXT: lsr w8, w0, w1
+; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i32 1, %y
@@ -143,9 +135,8 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scalar_i32_bitsinmiddle_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #16776960
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: tst w8, w0
+; CHECK-NEXT: lsr w8, w0, w1
+; CHECK-NEXT: tst w8, #0xffff00
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i32 16776960, %y
@@ -159,9 +150,8 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_signbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-9223372036854775808
-; CHECK-NEXT: lsl x8, x8, x1
-; CHECK-NEXT: tst x8, x0
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: tst x8, #0x8000000000000000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i64 9223372036854775808, %y
@@ -173,9 +163,8 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_lowestbit_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1
-; CHECK-NEXT: lsl x8, x8, x1
-; CHECK-NEXT: tst x8, x0
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: tst x8, #0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i64 1, %y
@@ -187,9 +176,8 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scalar_i64_bitsinmiddle_eq:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #281474976645120
-; CHECK-NEXT: lsl x8, x8, x1
-; CHECK-NEXT: tst x8, x0
+; CHECK-NEXT: lsr x8, x0, x1
+; CHECK-NEXT: tst x8, #0xffffffff0000
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t0 = shl i64 281474976645120, %y
@@ -283,12 +271,10 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_signbit_ne:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-128
+; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: tst w8, #0xff
-; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: lsr w8, w8, w1
+; CHECK-NEXT: ubfx w0, w8, #7, #1
; CHECK-NEXT: ret
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index db85233268f..7e5e3dfbc52 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -16,10 +16,10 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
; SI-NEXT: s_mov_b32 s7, s0
; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm
; SI-NEXT: v_and_b32_e32 v0, 7, v0
-; SI-NEXT: v_lshl_b32_e32 v0, 1, v0
; SI-NEXT: s_waitcnt vmcnt(0)
-; SI-NEXT: v_and_b32_e32 v0, v2, v0
-; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-NEXT: v_lshrrev_b32_e32 v0, v0, v2
+; SI-NEXT: v_and_b32_e32 v0, 1, v0
+; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0
; SI-NEXT: ; return to shader part epilog
@@ -37,10 +37,10 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
; VI-NEXT: s_mov_b32 s7, s0
; VI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm
; VI-NEXT: v_and_b32_e32 v0, 7, v0
-; VI-NEXT: v_lshlrev_b32_e64 v0, v0, 1
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: v_and_b32_e32 v0, v2, v0
-; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-NEXT: v_lshrrev_b32_e32 v0, v0, v2
+; VI-NEXT: v_and_b32_e32 v0, 1, v0
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; VI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v0
; VI-NEXT: ; return to shader part epilog
diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index 77414929970..cc97f2f0155 100644
--- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -21,20 +21,18 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; ARM-LABEL: scalar_i8_signbit_eq:
; ARM: @ %bb.0:
; ARM-NEXT: uxtb r1, r1
-; ARM-NEXT: mov r2, #128
-; ARM-NEXT: and r0, r0, r2, lsr r1
+; ARM-NEXT: lsl r0, r0, r1
+; ARM-NEXT: mov r1, #1
; ARM-NEXT: uxtb r0, r0
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: eor r0, r1, r0, lsr #7
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i8_signbit_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxtb r1, r1
-; THUMB6-NEXT: movs r2, #128
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxtb r1, r2
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: movs r1, #128
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
@@ -42,12 +40,10 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; THUMB78-LABEL: scalar_i8_signbit_eq:
; THUMB78: @ %bb.0:
; THUMB78-NEXT: uxtb r1, r1
-; THUMB78-NEXT: movs r2, #128
-; THUMB78-NEXT: lsr.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: movs r1, #1
; THUMB78-NEXT: uxtb r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: eor.w r0, r1, r0, lsr #7
; THUMB78-NEXT: bx lr
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x
@@ -60,19 +56,15 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; ARM: @ %bb.0:
; ARM-NEXT: uxtb r1, r1
; ARM-NEXT: mov r2, #1
-; ARM-NEXT: and r0, r0, r2, lsr r1
-; ARM-NEXT: uxtb r0, r0
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: bic r0, r2, r0, lsl r1
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i8_lowestbit_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxtb r1, r1
-; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxtb r1, r2
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
@@ -80,12 +72,9 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; THUMB78-LABEL: scalar_i8_lowestbit_eq:
; THUMB78: @ %bb.0:
; THUMB78-NEXT: uxtb r1, r1
-; THUMB78-NEXT: movs r2, #1
-; THUMB78-NEXT: lsr.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxtb r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: movs r1, #1
+; THUMB78-NEXT: bic.w r0, r1, r0
; THUMB78-NEXT: bx lr
%t0 = lshr i8 1, %y
%t1 = and i8 %t0, %x
@@ -98,8 +87,7 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; ARM: @ %bb.0:
; ARM-NEXT: uxtb r1, r1
; ARM-NEXT: mov r2, #24
-; ARM-NEXT: and r0, r0, r2, lsr r1
-; ARM-NEXT: uxtb r0, r0
+; ARM-NEXT: and r0, r2, r0, lsl r1
; ARM-NEXT: clz r0, r0
; ARM-NEXT: lsr r0, r0, #5
; ARM-NEXT: bx lr
@@ -107,10 +95,9 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; THUMB6-LABEL: scalar_i8_bitsinmiddle_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxtb r1, r1
-; THUMB6-NEXT: movs r2, #24
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxtb r1, r2
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: movs r1, #24
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
@@ -118,10 +105,8 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; THUMB78-LABEL: scalar_i8_bitsinmiddle_eq:
; THUMB78: @ %bb.0:
; THUMB78-NEXT: uxtb r1, r1
-; THUMB78-NEXT: movs r2, #24
-; THUMB78-NEXT: lsr.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxtb r0, r0
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: and r0, r0, #24
; THUMB78-NEXT: clz r0, r0
; THUMB78-NEXT: lsrs r0, r0, #5
; THUMB78-NEXT: bx lr
@@ -137,21 +122,19 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; ARM-LABEL: scalar_i16_signbit_eq:
; ARM: @ %bb.0:
; ARM-NEXT: uxth r1, r1
-; ARM-NEXT: mov r2, #32768
-; ARM-NEXT: and r0, r0, r2, lsr r1
+; ARM-NEXT: lsl r0, r0, r1
+; ARM-NEXT: mov r1, #1
; ARM-NEXT: uxth r0, r0
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: eor r0, r1, r0, lsr #15
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i16_signbit_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxth r1, r1
-; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: lsls r2, r2, #15
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxth r1, r2
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: lsls r1, r1, #15
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
@@ -159,12 +142,10 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; THUMB78-LABEL: scalar_i16_signbit_eq:
; THUMB78: @ %bb.0:
; THUMB78-NEXT: uxth r1, r1
-; THUMB78-NEXT: mov.w r2, #32768
-; THUMB78-NEXT: lsr.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: movs r1, #1
; THUMB78-NEXT: uxth r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: eor.w r0, r1, r0, lsr #15
; THUMB78-NEXT: bx lr
%t0 = lshr i16 32768, %y
%t1 = and i16 %t0, %x
@@ -177,19 +158,15 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; ARM: @ %bb.0:
; ARM-NEXT: uxth r1, r1
; ARM-NEXT: mov r2, #1
-; ARM-NEXT: and r0, r0, r2, lsr r1
-; ARM-NEXT: uxth r0, r0
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: bic r0, r2, r0, lsl r1
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i16_lowestbit_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxth r1, r1
-; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxth r1, r2
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
@@ -197,12 +174,9 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; THUMB78-LABEL: scalar_i16_lowestbit_eq:
; THUMB78: @ %bb.0:
; THUMB78-NEXT: uxth r1, r1
-; THUMB78-NEXT: movs r2, #1
-; THUMB78-NEXT: lsr.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxth r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: movs r1, #1
+; THUMB78-NEXT: bic.w r0, r1, r0
; THUMB78-NEXT: bx lr
%t0 = lshr i16 1, %y
%t1 = and i16 %t0, %x
@@ -215,8 +189,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; ARM: @ %bb.0:
; ARM-NEXT: uxth r1, r1
; ARM-NEXT: mov r2, #4080
-; ARM-NEXT: and r0, r0, r2, lsr r1
-; ARM-NEXT: uxth r0, r0
+; ARM-NEXT: and r0, r2, r0, lsl r1
; ARM-NEXT: clz r0, r0
; ARM-NEXT: lsr r0, r0, #5
; ARM-NEXT: bx lr
@@ -224,11 +197,10 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; THUMB6-LABEL: scalar_i16_bitsinmiddle_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxth r1, r1
-; THUMB6-NEXT: movs r2, #255
-; THUMB6-NEXT: lsls r2, r2, #4
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxth r1, r2
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: movs r1, #255
+; THUMB6-NEXT: lsls r1, r1, #4
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
@@ -236,10 +208,8 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; THUMB78-LABEL: scalar_i16_bitsinmiddle_eq:
; THUMB78: @ %bb.0:
; THUMB78-NEXT: uxth r1, r1
-; THUMB78-NEXT: mov.w r2, #4080
-; THUMB78-NEXT: lsr.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxth r0, r0
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: and r0, r0, #4080
; THUMB78-NEXT: clz r0, r0
; THUMB78-NEXT: lsrs r0, r0, #5
; THUMB78-NEXT: bx lr
@@ -254,29 +224,25 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; ARM-LABEL: scalar_i32_signbit_eq:
; ARM: @ %bb.0:
-; ARM-NEXT: mov r2, #-2147483648
-; ARM-NEXT: and r0, r0, r2, lsr r1
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: mvn r0, r0, lsl r1
+; ARM-NEXT: lsr r0, r0, #31
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i32_signbit_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: lsls r2, r2, #31
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: rsbs r0, r2, #0
-; THUMB6-NEXT: adcs r0, r2
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: lsls r1, r1, #31
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
+; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
;
; THUMB78-LABEL: scalar_i32_signbit_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: mov.w r2, #-2147483648
-; THUMB78-NEXT: lsr.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: mvns r0, r0
+; THUMB78-NEXT: lsrs r0, r0, #31
; THUMB78-NEXT: bx lr
%t0 = lshr i32 2147483648, %y
%t1 = and i32 %t0, %x
@@ -288,27 +254,23 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
; ARM-LABEL: scalar_i32_lowestbit_eq:
; ARM: @ %bb.0:
; ARM-NEXT: mov r2, #1
-; ARM-NEXT: and r0, r0, r2, lsr r1
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: bic r0, r2, r0, lsl r1
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i32_lowestbit_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: rsbs r0, r2, #0
-; THUMB6-NEXT: adcs r0, r2
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
+; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
;
; THUMB78-LABEL: scalar_i32_lowestbit_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: movs r2, #1
-; THUMB78-NEXT: lsr.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: movs r1, #1
+; THUMB78-NEXT: bic.w r0, r1, r0
; THUMB78-NEXT: bx lr
%t0 = lshr i32 1, %y
%t1 = and i32 %t0, %x
@@ -321,7 +283,7 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; ARM6: @ %bb.0:
; ARM6-NEXT: mov r2, #65280
; ARM6-NEXT: orr r2, r2, #16711680
-; ARM6-NEXT: and r0, r0, r2, lsr r1
+; ARM6-NEXT: and r0, r2, r0, lsl r1
; ARM6-NEXT: clz r0, r0
; ARM6-NEXT: lsr r0, r0, #5
; ARM6-NEXT: bx lr
@@ -330,18 +292,18 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; ARM78: @ %bb.0:
; ARM78-NEXT: movw r2, #65280
; ARM78-NEXT: movt r2, #255
-; ARM78-NEXT: and r0, r0, r2, lsr r1
+; ARM78-NEXT: and r0, r2, r0, lsl r1
; ARM78-NEXT: clz r0, r0
; ARM78-NEXT: lsr r0, r0, #5
; ARM78-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i32_bitsinmiddle_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: ldr r2, .LCPI8_0
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: rsbs r0, r2, #0
-; THUMB6-NEXT: adcs r0, r2
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: ldr r1, .LCPI8_0
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
+; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
; THUMB6-NEXT: .p2align 2
; THUMB6-NEXT: @ %bb.1:
@@ -350,9 +312,9 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
;
; THUMB78-LABEL: scalar_i32_bitsinmiddle_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: movw r2, #65280
-; THUMB78-NEXT: movt r2, #255
-; THUMB78-NEXT: lsr.w r1, r2, r1
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: movw r1, #65280
+; THUMB78-NEXT: movt r1, #255
; THUMB78-NEXT: ands r0, r1
; THUMB78-NEXT: clz r0, r0
; THUMB78-NEXT: lsrs r0, r0, #5
@@ -366,95 +328,54 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; i64 scalar
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
-; ARM6-LABEL: scalar_i64_signbit_eq:
-; ARM6: @ %bb.0:
-; ARM6-NEXT: push {r11, lr}
-; ARM6-NEXT: mov r12, #-2147483648
-; ARM6-NEXT: subs lr, r2, #32
-; ARM6-NEXT: lsr r3, r12, r2
-; ARM6-NEXT: rsb r2, r2, #32
-; ARM6-NEXT: movpl r3, #0
-; ARM6-NEXT: and r1, r3, r1
-; ARM6-NEXT: lsl r2, r12, r2
-; ARM6-NEXT: lsrpl r2, r12, lr
-; ARM6-NEXT: and r0, r2, r0
-; ARM6-NEXT: orr r0, r0, r1
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: pop {r11, pc}
-;
-; ARM78-LABEL: scalar_i64_signbit_eq:
-; ARM78: @ %bb.0:
-; ARM78-NEXT: push {r11, lr}
-; ARM78-NEXT: mov r12, #-2147483648
-; ARM78-NEXT: subs lr, r2, #32
-; ARM78-NEXT: lsr r3, r12, r2
-; ARM78-NEXT: rsb r2, r2, #32
-; ARM78-NEXT: movwpl r3, #0
-; ARM78-NEXT: and r1, r3, r1
-; ARM78-NEXT: lsl r2, r12, r2
-; ARM78-NEXT: lsrpl r2, r12, lr
-; ARM78-NEXT: and r0, r2, r0
-; ARM78-NEXT: orr r0, r0, r1
-; ARM78-NEXT: clz r0, r0
-; ARM78-NEXT: lsr r0, r0, #5
-; ARM78-NEXT: pop {r11, pc}
+; ARM-LABEL: scalar_i64_signbit_eq:
+; ARM: @ %bb.0:
+; ARM-NEXT: rsb r3, r2, #32
+; ARM-NEXT: lsr r3, r0, r3
+; ARM-NEXT: orr r1, r3, r1, lsl r2
+; ARM-NEXT: subs r2, r2, #32
+; ARM-NEXT: lslpl r1, r0, r2
+; ARM-NEXT: mvn r0, r1
+; ARM-NEXT: lsr r0, r0, #31
+; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i64_signbit_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: mov r4, r1
-; THUMB6-NEXT: mov r5, r0
+; THUMB6-NEXT: push {r7, lr}
+; THUMB6-NEXT: bl __ashldi3
; THUMB6-NEXT: movs r0, #1
-; THUMB6-NEXT: lsls r1, r0, #31
-; THUMB6-NEXT: movs r0, #0
-; THUMB6-NEXT: bl __lshrdi3
-; THUMB6-NEXT: ands r1, r4
-; THUMB6-NEXT: ands r0, r5
-; THUMB6-NEXT: orrs r0, r1
-; THUMB6-NEXT: rsbs r1, r0, #0
-; THUMB6-NEXT: adcs r0, r1
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: lsls r2, r0, #31
+; THUMB6-NEXT: ands r2, r1
+; THUMB6-NEXT: rsbs r0, r2, #0
+; THUMB6-NEXT: adcs r0, r2
+; THUMB6-NEXT: pop {r7, pc}
;
; THUMB7-LABEL: scalar_i64_signbit_eq:
; THUMB7: @ %bb.0:
-; THUMB7-NEXT: push {r7, lr}
; THUMB7-NEXT: rsb.w r3, r2, #32
-; THUMB7-NEXT: mov.w r12, #-2147483648
-; THUMB7-NEXT: subs.w lr, r2, #32
-; THUMB7-NEXT: lsr.w r2, r12, r2
-; THUMB7-NEXT: lsl.w r3, r12, r3
+; THUMB7-NEXT: lsls r1, r2
+; THUMB7-NEXT: subs r2, #32
+; THUMB7-NEXT: lsr.w r3, r0, r3
+; THUMB7-NEXT: orr.w r1, r1, r3
; THUMB7-NEXT: it pl
-; THUMB7-NEXT: lsrpl.w r3, r12, lr
-; THUMB7-NEXT: it pl
-; THUMB7-NEXT: movpl r2, #0
-; THUMB7-NEXT: ands r0, r3
-; THUMB7-NEXT: ands r1, r2
-; THUMB7-NEXT: orrs r0, r1
-; THUMB7-NEXT: clz r0, r0
-; THUMB7-NEXT: lsrs r0, r0, #5
-; THUMB7-NEXT: pop {r7, pc}
+; THUMB7-NEXT: lslpl.w r1, r0, r2
+; THUMB7-NEXT: mvns r0, r1
+; THUMB7-NEXT: lsrs r0, r0, #31
+; THUMB7-NEXT: bx lr
;
; THUMB8-LABEL: scalar_i64_signbit_eq:
; THUMB8: @ %bb.0:
-; THUMB8-NEXT: .save {r7, lr}
-; THUMB8-NEXT: push {r7, lr}
-; THUMB8-NEXT: subs.w r3, r2, #32
-; THUMB8-NEXT: mov.w r12, #-2147483648
-; THUMB8-NEXT: lsr.w lr, r12, r3
; THUMB8-NEXT: rsb.w r3, r2, #32
-; THUMB8-NEXT: lsr.w r2, r12, r2
-; THUMB8-NEXT: lsl.w r3, r12, r3
-; THUMB8-NEXT: it pl
-; THUMB8-NEXT: movpl r3, lr
-; THUMB8-NEXT: it pl
-; THUMB8-NEXT: movpl r2, #0
-; THUMB8-NEXT: ands r0, r3
-; THUMB8-NEXT: ands r1, r2
-; THUMB8-NEXT: orrs r0, r1
-; THUMB8-NEXT: clz r0, r0
-; THUMB8-NEXT: lsrs r0, r0, #5
-; THUMB8-NEXT: pop {r7, pc}
+; THUMB8-NEXT: lsls r1, r2
+; THUMB8-NEXT: lsr.w r3, r0, r3
+; THUMB8-NEXT: orrs r1, r3
+; THUMB8-NEXT: subs r2, #32
+; THUMB8-NEXT: lsl.w r0, r0, r2
+; THUMB8-NEXT: it mi
+; THUMB8-NEXT: movmi r0, r1
+; THUMB8-NEXT: mvns r0, r0
+; THUMB8-NEXT: lsrs r0, r0, #31
+; THUMB8-NEXT: bx lr
%t0 = lshr i64 9223372036854775808, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -464,51 +385,40 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
; ARM6-LABEL: scalar_i64_lowestbit_eq:
; ARM6: @ %bb.0:
+; ARM6-NEXT: subs r1, r2, #32
+; ARM6-NEXT: lsl r0, r0, r2
+; ARM6-NEXT: movpl r0, #0
; ARM6-NEXT: mov r1, #1
-; ARM6-NEXT: lsr r1, r1, r2
-; ARM6-NEXT: subs r2, r2, #32
-; ARM6-NEXT: movpl r1, #0
-; ARM6-NEXT: and r0, r1, r0
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: lsr r0, r0, #5
+; ARM6-NEXT: bic r0, r1, r0
; ARM6-NEXT: bx lr
;
; ARM78-LABEL: scalar_i64_lowestbit_eq:
; ARM78: @ %bb.0:
+; ARM78-NEXT: subs r1, r2, #32
+; ARM78-NEXT: lsl r0, r0, r2
+; ARM78-NEXT: movwpl r0, #0
; ARM78-NEXT: mov r1, #1
-; ARM78-NEXT: lsr r1, r1, r2
-; ARM78-NEXT: subs r2, r2, #32
-; ARM78-NEXT: movwpl r1, #0
-; ARM78-NEXT: and r0, r1, r0
-; ARM78-NEXT: clz r0, r0
-; ARM78-NEXT: lsr r0, r0, #5
+; ARM78-NEXT: bic r0, r1, r0
; ARM78-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i64_lowestbit_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: mov r4, r1
-; THUMB6-NEXT: mov r5, r0
-; THUMB6-NEXT: movs r0, #1
-; THUMB6-NEXT: movs r1, #0
-; THUMB6-NEXT: bl __lshrdi3
-; THUMB6-NEXT: ands r1, r4
-; THUMB6-NEXT: ands r0, r5
-; THUMB6-NEXT: orrs r0, r1
-; THUMB6-NEXT: rsbs r1, r0, #0
+; THUMB6-NEXT: push {r7, lr}
+; THUMB6-NEXT: bl __ashldi3
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r7, pc}
;
; THUMB78-LABEL: scalar_i64_lowestbit_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: movs r1, #1
-; THUMB78-NEXT: lsrs r1, r2
-; THUMB78-NEXT: subs r2, #32
+; THUMB78-NEXT: lsls r0, r2
+; THUMB78-NEXT: subs.w r1, r2, #32
; THUMB78-NEXT: it pl
-; THUMB78-NEXT: movpl r1, #0
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: movpl r0, #0
+; THUMB78-NEXT: movs r1, #1
+; THUMB78-NEXT: bic.w r0, r1, r0
; THUMB78-NEXT: bx lr
%t0 = lshr i64 1, %y
%t1 = and i64 %t0, %x
@@ -519,115 +429,82 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; ARM6-LABEL: scalar_i64_bitsinmiddle_eq:
; ARM6: @ %bb.0:
-; ARM6-NEXT: push {r11, lr}
-; ARM6-NEXT: mov r12, #255
-; ARM6-NEXT: subs lr, r2, #32
-; ARM6-NEXT: orr r12, r12, #65280
-; ARM6-NEXT: lsr r3, r12, r2
-; ARM6-NEXT: movpl r3, #0
-; ARM6-NEXT: and r1, r3, r1
-; ARM6-NEXT: mov r3, #16711680
-; ARM6-NEXT: cmp lr, #0
-; ARM6-NEXT: orr r3, r3, #-16777216
-; ARM6-NEXT: lsr r3, r3, r2
-; ARM6-NEXT: rsb r2, r2, #32
-; ARM6-NEXT: orr r2, r3, r12, lsl r2
-; ARM6-NEXT: lsrpl r2, r12, lr
-; ARM6-NEXT: and r0, r2, r0
-; ARM6-NEXT: orr r0, r0, r1
+; ARM6-NEXT: rsb r3, r2, #32
+; ARM6-NEXT: lsr r3, r0, r3
+; ARM6-NEXT: orr r1, r3, r1, lsl r2
+; ARM6-NEXT: subs r3, r2, #32
+; ARM6-NEXT: lslpl r1, r0, r3
+; ARM6-NEXT: lsl r0, r0, r2
+; ARM6-NEXT: movpl r0, #0
+; ARM6-NEXT: pkhbt r0, r1, r0
; ARM6-NEXT: clz r0, r0
; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: pop {r11, pc}
+; ARM6-NEXT: bx lr
;
; ARM78-LABEL: scalar_i64_bitsinmiddle_eq:
; ARM78: @ %bb.0:
-; ARM78-NEXT: push {r11, lr}
-; ARM78-NEXT: movw r12, #65535
-; ARM78-NEXT: subs lr, r2, #32
-; ARM78-NEXT: lsr r3, r12, r2
-; ARM78-NEXT: movwpl r3, #0
-; ARM78-NEXT: and r1, r3, r1
-; ARM78-NEXT: movw r3, #0
-; ARM78-NEXT: cmp lr, #0
-; ARM78-NEXT: movt r3, #65535
-; ARM78-NEXT: lsr r3, r3, r2
-; ARM78-NEXT: rsb r2, r2, #32
-; ARM78-NEXT: orr r2, r3, r12, lsl r2
-; ARM78-NEXT: lsrpl r2, r12, lr
-; ARM78-NEXT: and r0, r2, r0
-; ARM78-NEXT: orr r0, r0, r1
+; ARM78-NEXT: rsb r3, r2, #32
+; ARM78-NEXT: lsr r3, r0, r3
+; ARM78-NEXT: orr r1, r3, r1, lsl r2
+; ARM78-NEXT: subs r3, r2, #32
+; ARM78-NEXT: lslpl r1, r0, r3
+; ARM78-NEXT: lsl r0, r0, r2
+; ARM78-NEXT: movwpl r0, #0
+; ARM78-NEXT: pkhbt r0, r1, r0
; ARM78-NEXT: clz r0, r0
; ARM78-NEXT: lsr r0, r0, #5
-; ARM78-NEXT: pop {r11, pc}
+; ARM78-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i64_bitsinmiddle_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: mov r4, r1
-; THUMB6-NEXT: mov r5, r0
-; THUMB6-NEXT: ldr r0, .LCPI11_0
-; THUMB6-NEXT: ldr r1, .LCPI11_1
-; THUMB6-NEXT: bl __lshrdi3
-; THUMB6-NEXT: ands r1, r4
-; THUMB6-NEXT: ands r0, r5
-; THUMB6-NEXT: orrs r0, r1
-; THUMB6-NEXT: rsbs r1, r0, #0
+; THUMB6-NEXT: push {r7, lr}
+; THUMB6-NEXT: bl __ashldi3
+; THUMB6-NEXT: ldr r2, .LCPI11_0
+; THUMB6-NEXT: ands r2, r0
+; THUMB6-NEXT: uxth r0, r1
+; THUMB6-NEXT: adds r1, r2, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r7, pc}
; THUMB6-NEXT: .p2align 2
; THUMB6-NEXT: @ %bb.1:
; THUMB6-NEXT: .LCPI11_0:
; THUMB6-NEXT: .long 4294901760 @ 0xffff0000
-; THUMB6-NEXT: .LCPI11_1:
-; THUMB6-NEXT: .long 65535 @ 0xffff
;
; THUMB7-LABEL: scalar_i64_bitsinmiddle_eq:
; THUMB7: @ %bb.0:
-; THUMB7-NEXT: push {r7, lr}
-; THUMB7-NEXT: movs r3, #0
-; THUMB7-NEXT: movw lr, #65535
-; THUMB7-NEXT: movt r3, #65535
-; THUMB7-NEXT: lsr.w r12, r3, r2
; THUMB7-NEXT: rsb.w r3, r2, #32
-; THUMB7-NEXT: lsl.w r3, lr, r3
-; THUMB7-NEXT: orr.w r12, r12, r3
+; THUMB7-NEXT: lsls r1, r2
+; THUMB7-NEXT: lsr.w r3, r0, r3
+; THUMB7-NEXT: orrs r1, r3
; THUMB7-NEXT: subs.w r3, r2, #32
-; THUMB7-NEXT: lsr.w r2, lr, r2
; THUMB7-NEXT: it pl
-; THUMB7-NEXT: lsrpl.w r12, lr, r3
+; THUMB7-NEXT: lslpl.w r1, r0, r3
+; THUMB7-NEXT: lsl.w r0, r0, r2
; THUMB7-NEXT: it pl
-; THUMB7-NEXT: movpl r2, #0
-; THUMB7-NEXT: and.w r0, r0, r12
-; THUMB7-NEXT: ands r1, r2
-; THUMB7-NEXT: orrs r0, r1
+; THUMB7-NEXT: movpl r0, #0
+; THUMB7-NEXT: pkhbt r0, r1, r0
; THUMB7-NEXT: clz r0, r0
; THUMB7-NEXT: lsrs r0, r0, #5
-; THUMB7-NEXT: pop {r7, pc}
+; THUMB7-NEXT: bx lr
;
; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq:
; THUMB8: @ %bb.0:
-; THUMB8-NEXT: .save {r7, lr}
-; THUMB8-NEXT: push {r7, lr}
-; THUMB8-NEXT: movs r3, #0
-; THUMB8-NEXT: movw lr, #65535
-; THUMB8-NEXT: movt r3, #65535
-; THUMB8-NEXT: lsr.w r12, r3, r2
; THUMB8-NEXT: rsb.w r3, r2, #32
-; THUMB8-NEXT: lsl.w r3, lr, r3
-; THUMB8-NEXT: orr.w r12, r12, r3
+; THUMB8-NEXT: lsls r1, r2
+; THUMB8-NEXT: lsr.w r3, r0, r3
+; THUMB8-NEXT: orrs r1, r3
; THUMB8-NEXT: subs.w r3, r2, #32
-; THUMB8-NEXT: lsr.w r2, lr, r2
-; THUMB8-NEXT: lsr.w r3, lr, r3
+; THUMB8-NEXT: lsl.w r3, r0, r3
+; THUMB8-NEXT: lsl.w r0, r0, r2
; THUMB8-NEXT: it mi
-; THUMB8-NEXT: movmi r3, r12
+; THUMB8-NEXT: movmi r3, r1
; THUMB8-NEXT: it pl
-; THUMB8-NEXT: movpl r2, #0
-; THUMB8-NEXT: ands r0, r3
-; THUMB8-NEXT: ands r1, r2
-; THUMB8-NEXT: orrs r0, r1
+; THUMB8-NEXT: movpl r0, #0
+; THUMB8-NEXT: pkhbt r0, r3, r0
; THUMB8-NEXT: clz r0, r0
; THUMB8-NEXT: lsrs r0, r0, #5
-; THUMB8-NEXT: pop {r7, pc}
+; THUMB8-NEXT: bx lr
%t0 = lshr i64 281474976645120, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -644,33 +521,24 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; ARM6-NEXT: push {r11, lr}
; ARM6-NEXT: ldr r12, [sp, #8]
; ARM6-NEXT: mov lr, #1
-; ARM6-NEXT: and r0, r0, lr, lsr r12
+; ARM6-NEXT: bic r0, lr, r0, lsl r12
; ARM6-NEXT: ldr r12, [sp, #12]
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: and r1, r1, lr, lsr r12
+; ARM6-NEXT: bic r1, lr, r1, lsl r12
; ARM6-NEXT: ldr r12, [sp, #16]
-; ARM6-NEXT: clz r1, r1
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: and r2, r2, lr, lsr r12
+; ARM6-NEXT: bic r2, lr, r2, lsl r12
; ARM6-NEXT: ldr r12, [sp, #20]
-; ARM6-NEXT: clz r2, r2
-; ARM6-NEXT: lsr r1, r1, #5
-; ARM6-NEXT: and r3, r3, lr, lsr r12
-; ARM6-NEXT: lsr r2, r2, #5
-; ARM6-NEXT: clz r3, r3
-; ARM6-NEXT: lsr r3, r3, #5
+; ARM6-NEXT: bic r3, lr, r3, lsl r12
; ARM6-NEXT: pop {r11, pc}
;
; ARM78-LABEL: vec_4xi32_splat_eq:
; ARM78: @ %bb.0:
+; ARM78-NEXT: vmov d17, r2, r3
; ARM78-NEXT: mov r12, sp
-; ARM78-NEXT: vld1.64 {d16, d17}, [r12]
-; ARM78-NEXT: vmov.i32 q9, #0x1
-; ARM78-NEXT: vneg.s32 q8, q8
-; ARM78-NEXT: vshl.u32 q8, q9, q8
-; ARM78-NEXT: vmov d19, r2, r3
-; ARM78-NEXT: vmov d18, r0, r1
-; ARM78-NEXT: vtst.32 q8, q8, q9
+; ARM78-NEXT: vld1.64 {d18, d19}, [r12]
+; ARM78-NEXT: vmov d16, r0, r1
+; ARM78-NEXT: vmov.i32 q10, #0x1
+; ARM78-NEXT: vshl.u32 q8, q8, q9
+; ARM78-NEXT: vtst.32 q8, q8, q10
; ARM78-NEXT: vmvn q8, q8
; ARM78-NEXT: vmovn.i32 d16, q8
; ARM78-NEXT: vmov r0, r1, d16
@@ -678,43 +546,39 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
;
; THUMB6-LABEL: vec_4xi32_splat_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r6, lr}
-; THUMB6-NEXT: ldr r5, [sp, #16]
+; THUMB6-NEXT: push {r4, r5, r7, lr}
+; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: lsls r0, r4
; THUMB6-NEXT: movs r4, #1
-; THUMB6-NEXT: mov r6, r4
-; THUMB6-NEXT: lsrs r6, r5
-; THUMB6-NEXT: ands r6, r0
-; THUMB6-NEXT: rsbs r0, r6, #0
-; THUMB6-NEXT: adcs r0, r6
+; THUMB6-NEXT: ands r0, r4
+; THUMB6-NEXT: rsbs r5, r0, #0
+; THUMB6-NEXT: adcs r0, r5
; THUMB6-NEXT: ldr r5, [sp, #20]
-; THUMB6-NEXT: mov r6, r4
-; THUMB6-NEXT: lsrs r6, r5
-; THUMB6-NEXT: ands r6, r1
-; THUMB6-NEXT: rsbs r1, r6, #0
-; THUMB6-NEXT: adcs r1, r6
+; THUMB6-NEXT: lsls r1, r5
+; THUMB6-NEXT: ands r1, r4
+; THUMB6-NEXT: rsbs r5, r1, #0
+; THUMB6-NEXT: adcs r1, r5
; THUMB6-NEXT: ldr r5, [sp, #24]
-; THUMB6-NEXT: mov r6, r4
-; THUMB6-NEXT: lsrs r6, r5
-; THUMB6-NEXT: ands r6, r2
-; THUMB6-NEXT: rsbs r2, r6, #0
-; THUMB6-NEXT: adcs r2, r6
+; THUMB6-NEXT: lsls r2, r5
+; THUMB6-NEXT: ands r2, r4
+; THUMB6-NEXT: rsbs r5, r2, #0
+; THUMB6-NEXT: adcs r2, r5
; THUMB6-NEXT: ldr r5, [sp, #28]
-; THUMB6-NEXT: lsrs r4, r5
-; THUMB6-NEXT: ands r4, r3
-; THUMB6-NEXT: rsbs r3, r4, #0
+; THUMB6-NEXT: lsls r3, r5
+; THUMB6-NEXT: ands r3, r4
+; THUMB6-NEXT: rsbs r4, r3, #0
; THUMB6-NEXT: adcs r3, r4
-; THUMB6-NEXT: pop {r4, r5, r6, pc}
+; THUMB6-NEXT: pop {r4, r5, r7, pc}
;
; THUMB78-LABEL: vec_4xi32_splat_eq:
; THUMB78: @ %bb.0:
+; THUMB78-NEXT: vmov d17, r2, r3
; THUMB78-NEXT: mov r12, sp
-; THUMB78-NEXT: vld1.64 {d16, d17}, [r12]
-; THUMB78-NEXT: vmov.i32 q9, #0x1
-; THUMB78-NEXT: vneg.s32 q8, q8
-; THUMB78-NEXT: vshl.u32 q8, q9, q8
-; THUMB78-NEXT: vmov d19, r2, r3
-; THUMB78-NEXT: vmov d18, r0, r1
-; THUMB78-NEXT: vtst.32 q8, q8, q9
+; THUMB78-NEXT: vld1.64 {d18, d19}, [r12]
+; THUMB78-NEXT: vmov d16, r0, r1
+; THUMB78-NEXT: vmov.i32 q10, #0x1
+; THUMB78-NEXT: vshl.u32 q8, q8, q9
+; THUMB78-NEXT: vtst.32 q8, q8, q10
; THUMB78-NEXT: vmvn q8, q8
; THUMB78-NEXT: vmovn.i32 d16, q8
; THUMB78-NEXT: vmov r0, r1, d16
@@ -730,20 +594,16 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; ARM6: @ %bb.0:
; ARM6-NEXT: ldr r12, [sp, #4]
; ARM6-NEXT: mov r0, #1
-; ARM6-NEXT: and r0, r1, r0, lsr r12
+; ARM6-NEXT: bic r1, r0, r1, lsl r12
; ARM6-NEXT: ldr r12, [sp, #8]
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: lsr r1, r0, #5
; ARM6-NEXT: mov r0, #65280
; ARM6-NEXT: orr r0, r0, #16711680
-; ARM6-NEXT: and r0, r2, r0, lsr r12
-; ARM6-NEXT: ldr r12, [sp, #12]
+; ARM6-NEXT: and r0, r0, r2, lsl r12
; ARM6-NEXT: clz r0, r0
; ARM6-NEXT: lsr r2, r0, #5
-; ARM6-NEXT: mov r0, #-2147483648
-; ARM6-NEXT: and r0, r3, r0, lsr r12
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: lsr r3, r0, #5
+; ARM6-NEXT: ldr r0, [sp, #12]
+; ARM6-NEXT: mvn r0, r3, lsl r0
+; ARM6-NEXT: lsr r3, r0, #31
; ARM6-NEXT: mov r0, #1
; ARM6-NEXT: bx lr
;
@@ -772,27 +632,26 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
;
; THUMB6-LABEL: vec_4xi32_nonsplat_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: ldr r4, [sp, #20]
+; THUMB6-NEXT: push {r4, lr}
+; THUMB6-NEXT: ldr r0, [sp, #12]
+; THUMB6-NEXT: lsls r1, r0
; THUMB6-NEXT: movs r0, #1
-; THUMB6-NEXT: mov r5, r0
-; THUMB6-NEXT: lsrs r5, r4
-; THUMB6-NEXT: ands r5, r1
-; THUMB6-NEXT: rsbs r1, r5, #0
-; THUMB6-NEXT: adcs r1, r5
-; THUMB6-NEXT: ldr r4, [sp, #24]
-; THUMB6-NEXT: ldr r5, .LCPI13_0
-; THUMB6-NEXT: lsrs r5, r4
-; THUMB6-NEXT: ands r5, r2
-; THUMB6-NEXT: rsbs r2, r5, #0
-; THUMB6-NEXT: adcs r2, r5
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r4, r1, #0
+; THUMB6-NEXT: adcs r1, r4
+; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: lsls r2, r4
+; THUMB6-NEXT: ldr r4, .LCPI13_0
+; THUMB6-NEXT: ands r4, r2
+; THUMB6-NEXT: rsbs r2, r4, #0
+; THUMB6-NEXT: adcs r2, r4
+; THUMB6-NEXT: ldr r4, [sp, #20]
+; THUMB6-NEXT: lsls r3, r4
; THUMB6-NEXT: lsls r4, r0, #31
-; THUMB6-NEXT: ldr r5, [sp, #28]
-; THUMB6-NEXT: lsrs r4, r5
; THUMB6-NEXT: ands r4, r3
; THUMB6-NEXT: rsbs r3, r4, #0
; THUMB6-NEXT: adcs r3, r4
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r4, pc}
; THUMB6-NEXT: .p2align 2
; THUMB6-NEXT: @ %bb.1:
; THUMB6-NEXT: .LCPI13_0:
@@ -833,29 +692,22 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; ARM6-NEXT: ldr r2, [sp, #12]
; ARM6-NEXT: mov lr, #1
; ARM6-NEXT: ldr r12, [sp, #8]
-; ARM6-NEXT: and r1, r1, lr, lsr r2
+; ARM6-NEXT: bic r1, lr, r1, lsl r2
; ARM6-NEXT: ldr r2, [sp, #20]
-; ARM6-NEXT: and r0, r0, lr, lsr r12
-; ARM6-NEXT: clz r1, r1
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: and r2, r3, lr, lsr r2
-; ARM6-NEXT: lsr r1, r1, #5
-; ARM6-NEXT: clz r2, r2
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: lsr r3, r2, #5
+; ARM6-NEXT: bic r0, lr, r0, lsl r12
+; ARM6-NEXT: bic r3, lr, r3, lsl r2
; ARM6-NEXT: mov r2, #1
; ARM6-NEXT: pop {r11, pc}
;
; ARM78-LABEL: vec_4xi32_nonsplat_undef0_eq:
; ARM78: @ %bb.0:
+; ARM78-NEXT: vmov d17, r2, r3
; ARM78-NEXT: mov r12, sp
-; ARM78-NEXT: vld1.64 {d16, d17}, [r12]
-; ARM78-NEXT: vmov.i32 q9, #0x1
-; ARM78-NEXT: vneg.s32 q8, q8
-; ARM78-NEXT: vshl.u32 q8, q9, q8
-; ARM78-NEXT: vmov d19, r2, r3
-; ARM78-NEXT: vmov d18, r0, r1
-; ARM78-NEXT: vtst.32 q8, q8, q9
+; ARM78-NEXT: vld1.64 {d18, d19}, [r12]
+; ARM78-NEXT: vmov d16, r0, r1
+; ARM78-NEXT: vmov.i32 q10, #0x1
+; ARM78-NEXT: vshl.u32 q8, q8, q9
+; ARM78-NEXT: vtst.32 q8, q8, q10
; ARM78-NEXT: vmvn q8, q8
; ARM78-NEXT: vmovn.i32 d16, q8
; ARM78-NEXT: vmov r0, r1, d16
@@ -863,38 +715,34 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;
; THUMB6-LABEL: vec_4xi32_nonsplat_undef0_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: push {r4, lr}
+; THUMB6-NEXT: ldr r2, [sp, #8]
+; THUMB6-NEXT: lsls r0, r2
; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsrs r5, r4
-; THUMB6-NEXT: ands r5, r0
-; THUMB6-NEXT: rsbs r0, r5, #0
-; THUMB6-NEXT: adcs r0, r5
+; THUMB6-NEXT: ands r0, r2
+; THUMB6-NEXT: rsbs r4, r0, #0
+; THUMB6-NEXT: adcs r0, r4
+; THUMB6-NEXT: ldr r4, [sp, #12]
+; THUMB6-NEXT: lsls r1, r4
+; THUMB6-NEXT: ands r1, r2
+; THUMB6-NEXT: rsbs r4, r1, #0
+; THUMB6-NEXT: adcs r1, r4
; THUMB6-NEXT: ldr r4, [sp, #20]
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsrs r5, r4
-; THUMB6-NEXT: ands r5, r1
-; THUMB6-NEXT: rsbs r1, r5, #0
-; THUMB6-NEXT: adcs r1, r5
-; THUMB6-NEXT: ldr r4, [sp, #28]
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsrs r5, r4
-; THUMB6-NEXT: ands r5, r3
-; THUMB6-NEXT: rsbs r3, r5, #0
-; THUMB6-NEXT: adcs r3, r5
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: lsls r3, r4
+; THUMB6-NEXT: ands r3, r2
+; THUMB6-NEXT: rsbs r4, r3, #0
+; THUMB6-NEXT: adcs r3, r4
+; THUMB6-NEXT: pop {r4, pc}
;
; THUMB78-LABEL: vec_4xi32_nonsplat_undef0_eq:
; THUMB78: @ %bb.0:
+; THUMB78-NEXT: vmov d17, r2, r3
; THUMB78-NEXT: mov r12, sp
-; THUMB78-NEXT: vld1.64 {d16, d17}, [r12]
-; THUMB78-NEXT: vmov.i32 q9, #0x1
-; THUMB78-NEXT: vneg.s32 q8, q8
-; THUMB78-NEXT: vshl.u32 q8, q9, q8
-; THUMB78-NEXT: vmov d19, r2, r3
-; THUMB78-NEXT: vmov d18, r0, r1
-; THUMB78-NEXT: vtst.32 q8, q8, q9
+; THUMB78-NEXT: vld1.64 {d18, d19}, [r12]
+; THUMB78-NEXT: vmov d16, r0, r1
+; THUMB78-NEXT: vmov.i32 q10, #0x1
+; THUMB78-NEXT: vshl.u32 q8, q8, q9
+; THUMB78-NEXT: vtst.32 q8, q8, q10
; THUMB78-NEXT: vmvn q8, q8
; THUMB78-NEXT: vmovn.i32 d16, q8
; THUMB78-NEXT: vmov r0, r1, d16
@@ -911,16 +759,10 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; ARM6-NEXT: ldr r2, [sp, #12]
; ARM6-NEXT: mov lr, #1
; ARM6-NEXT: ldr r12, [sp, #8]
-; ARM6-NEXT: and r1, r1, lr, lsr r2
+; ARM6-NEXT: bic r1, lr, r1, lsl r2
; ARM6-NEXT: ldr r2, [sp, #20]
-; ARM6-NEXT: and r0, r0, lr, lsr r12
-; ARM6-NEXT: clz r1, r1
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: and r2, r3, lr, lsr r2
-; ARM6-NEXT: lsr r1, r1, #5
-; ARM6-NEXT: clz r2, r2
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: lsr r3, r2, #5
+; ARM6-NEXT: bic r0, lr, r0, lsl r12
+; ARM6-NEXT: bic r3, lr, r3, lsl r2
; ARM6-NEXT: pop {r11, pc}
;
; ARM78-LABEL: vec_4xi32_nonsplat_undef1_eq:
@@ -940,26 +782,24 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;
; THUMB6-LABEL: vec_4xi32_nonsplat_undef1_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: push {r4, lr}
+; THUMB6-NEXT: ldr r2, [sp, #8]
+; THUMB6-NEXT: lsls r0, r2
; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsrs r5, r4
-; THUMB6-NEXT: ands r5, r0
-; THUMB6-NEXT: rsbs r0, r5, #0
-; THUMB6-NEXT: adcs r0, r5
+; THUMB6-NEXT: ands r0, r2
+; THUMB6-NEXT: rsbs r4, r0, #0
+; THUMB6-NEXT: adcs r0, r4
+; THUMB6-NEXT: ldr r4, [sp, #12]
+; THUMB6-NEXT: lsls r1, r4
+; THUMB6-NEXT: ands r1, r2
+; THUMB6-NEXT: rsbs r4, r1, #0
+; THUMB6-NEXT: adcs r1, r4
; THUMB6-NEXT: ldr r4, [sp, #20]
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsrs r5, r4
-; THUMB6-NEXT: ands r5, r1
-; THUMB6-NEXT: rsbs r1, r5, #0
-; THUMB6-NEXT: adcs r1, r5
-; THUMB6-NEXT: ldr r4, [sp, #28]
-; THUMB6-NEXT: lsrs r2, r4
-; THUMB6-NEXT: ands r2, r3
-; THUMB6-NEXT: rsbs r3, r2, #0
+; THUMB6-NEXT: lsls r3, r4
+; THUMB6-NEXT: ands r3, r2
+; THUMB6-NEXT: rsbs r2, r3, #0
; THUMB6-NEXT: adcs r3, r2
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r4, pc}
;
; THUMB78-LABEL: vec_4xi32_nonsplat_undef1_eq:
; THUMB78: @ %bb.0:
@@ -987,16 +827,10 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; ARM6-NEXT: ldr r2, [sp, #12]
; ARM6-NEXT: mov lr, #1
; ARM6-NEXT: ldr r12, [sp, #8]
-; ARM6-NEXT: and r1, r1, lr, lsr r2
+; ARM6-NEXT: bic r1, lr, r1, lsl r2
; ARM6-NEXT: ldr r2, [sp, #20]
-; ARM6-NEXT: and r0, r0, lr, lsr r12
-; ARM6-NEXT: clz r1, r1
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: and r2, r3, lr, lsr r2
-; ARM6-NEXT: lsr r1, r1, #5
-; ARM6-NEXT: clz r2, r2
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: lsr r3, r2, #5
+; ARM6-NEXT: bic r0, lr, r0, lsl r12
+; ARM6-NEXT: bic r3, lr, r3, lsl r2
; ARM6-NEXT: pop {r11, pc}
;
; ARM78-LABEL: vec_4xi32_nonsplat_undef2_eq:
@@ -1016,26 +850,24 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;
; THUMB6-LABEL: vec_4xi32_nonsplat_undef2_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: push {r4, lr}
+; THUMB6-NEXT: ldr r2, [sp, #8]
+; THUMB6-NEXT: lsls r0, r2
; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsrs r5, r4
-; THUMB6-NEXT: ands r5, r0
-; THUMB6-NEXT: rsbs r0, r5, #0
-; THUMB6-NEXT: adcs r0, r5
+; THUMB6-NEXT: ands r0, r2
+; THUMB6-NEXT: rsbs r4, r0, #0
+; THUMB6-NEXT: adcs r0, r4
+; THUMB6-NEXT: ldr r4, [sp, #12]
+; THUMB6-NEXT: lsls r1, r4
+; THUMB6-NEXT: ands r1, r2
+; THUMB6-NEXT: rsbs r4, r1, #0
+; THUMB6-NEXT: adcs r1, r4
; THUMB6-NEXT: ldr r4, [sp, #20]
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsrs r5, r4
-; THUMB6-NEXT: ands r5, r1
-; THUMB6-NEXT: rsbs r1, r5, #0
-; THUMB6-NEXT: adcs r1, r5
-; THUMB6-NEXT: ldr r4, [sp, #28]
-; THUMB6-NEXT: lsrs r2, r4
-; THUMB6-NEXT: ands r2, r3
-; THUMB6-NEXT: rsbs r3, r2, #0
+; THUMB6-NEXT: lsls r3, r4
+; THUMB6-NEXT: ands r3, r2
+; THUMB6-NEXT: rsbs r2, r3, #0
; THUMB6-NEXT: adcs r3, r2
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r4, pc}
;
; THUMB78-LABEL: vec_4xi32_nonsplat_undef2_eq:
; THUMB78: @ %bb.0:
@@ -1062,48 +894,21 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;------------------------------------------------------------------------------;
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
-; ARM6-LABEL: scalar_i8_signbit_ne:
-; ARM6: @ %bb.0:
-; ARM6-NEXT: uxtb r1, r1
-; ARM6-NEXT: mov r2, #128
-; ARM6-NEXT: and r0, r0, r2, lsr r1
-; ARM6-NEXT: uxtb r0, r0
-; ARM6-NEXT: cmp r0, #0
-; ARM6-NEXT: movne r0, #1
-; ARM6-NEXT: bx lr
-;
-; ARM78-LABEL: scalar_i8_signbit_ne:
-; ARM78: @ %bb.0:
-; ARM78-NEXT: uxtb r1, r1
-; ARM78-NEXT: mov r2, #128
-; ARM78-NEXT: and r0, r0, r2, lsr r1
-; ARM78-NEXT: uxtb r0, r0
-; ARM78-NEXT: cmp r0, #0
-; ARM78-NEXT: movwne r0, #1
-; ARM78-NEXT: bx lr
-;
-; THUMB6-LABEL: scalar_i8_signbit_ne:
-; THUMB6: @ %bb.0:
-; THUMB6-NEXT: uxtb r1, r1
-; THUMB6-NEXT: movs r2, #128
-; THUMB6-NEXT: lsrs r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxtb r0, r2
-; THUMB6-NEXT: subs r1, r0, #1
-; THUMB6-NEXT: sbcs r0, r1
-; THUMB6-NEXT: bx lr
+; ARM-LABEL: scalar_i8_signbit_ne:
+; ARM: @ %bb.0:
+; ARM-NEXT: uxtb r1, r1
+; ARM-NEXT: lsl r0, r0, r1
+; ARM-NEXT: uxtb r0, r0
+; ARM-NEXT: lsr r0, r0, #7
+; ARM-NEXT: bx lr
;
-; THUMB78-LABEL: scalar_i8_signbit_ne:
-; THUMB78: @ %bb.0:
-; THUMB78-NEXT: uxtb r1, r1
-; THUMB78-NEXT: movs r2, #128
-; THUMB78-NEXT: lsr.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxtb r0, r0
-; THUMB78-NEXT: cmp r0, #0
-; THUMB78-NEXT: it ne
-; THUMB78-NEXT: movne r0, #1
-; THUMB78-NEXT: bx lr
+; THUMB-LABEL: scalar_i8_signbit_ne:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: uxtb r1, r1
+; THUMB-NEXT: lsls r0, r1
+; THUMB-NEXT: uxtb r0, r0
+; THUMB-NEXT: lsrs r0, r0, #7
+; THUMB-NEXT: bx lr
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate
diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 331085fe767..b59c8a1d955 100644
--- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -21,35 +21,43 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; ARM-LABEL: scalar_i8_signbit_eq:
; ARM: @ %bb.0:
; ARM-NEXT: uxtb r1, r1
-; ARM-NEXT: mvn r2, #127
-; ARM-NEXT: and r0, r0, r2, lsl r1
; ARM-NEXT: uxtb r0, r0
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: lsr r0, r0, r1
+; ARM-NEXT: mov r1, #1
+; ARM-NEXT: uxtb r0, r0
+; ARM-NEXT: eor r0, r1, r0, lsr #7
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i8_signbit_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxtb r1, r1
-; THUMB6-NEXT: movs r2, #127
-; THUMB6-NEXT: mvns r2, r2
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxtb r1, r2
+; THUMB6-NEXT: uxtb r0, r0
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: movs r1, #128
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
;
-; THUMB78-LABEL: scalar_i8_signbit_eq:
-; THUMB78: @ %bb.0:
-; THUMB78-NEXT: uxtb r1, r1
-; THUMB78-NEXT: mvn r2, #127
-; THUMB78-NEXT: lsl.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxtb r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
-; THUMB78-NEXT: bx lr
+; THUMB7-LABEL: scalar_i8_signbit_eq:
+; THUMB7: @ %bb.0:
+; THUMB7-NEXT: uxtb r1, r1
+; THUMB7-NEXT: uxtb r0, r0
+; THUMB7-NEXT: lsrs r0, r1
+; THUMB7-NEXT: movs r1, #1
+; THUMB7-NEXT: uxtb r0, r0
+; THUMB7-NEXT: eor.w r0, r1, r0, lsr #7
+; THUMB7-NEXT: bx lr
+;
+; THUMB8-LABEL: scalar_i8_signbit_eq:
+; THUMB8: @ %bb.0:
+; THUMB8-NEXT: uxtb r0, r0
+; THUMB8-NEXT: uxtb r1, r1
+; THUMB8-NEXT: lsrs r0, r1
+; THUMB8-NEXT: movs r1, #1
+; THUMB8-NEXT: uxtb r0, r0
+; THUMB8-NEXT: eor.w r0, r1, r0, lsr #7
+; THUMB8-NEXT: bx lr
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 0
@@ -60,34 +68,39 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; ARM-LABEL: scalar_i8_lowestbit_eq:
; ARM: @ %bb.0:
; ARM-NEXT: uxtb r1, r1
-; ARM-NEXT: mov r2, #1
-; ARM-NEXT: and r0, r0, r2, lsl r1
; ARM-NEXT: uxtb r0, r0
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: mov r2, #1
+; ARM-NEXT: bic r0, r2, r0, lsr r1
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i8_lowestbit_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxtb r1, r1
-; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxtb r1, r2
+; THUMB6-NEXT: uxtb r0, r0
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
;
-; THUMB78-LABEL: scalar_i8_lowestbit_eq:
-; THUMB78: @ %bb.0:
-; THUMB78-NEXT: uxtb r1, r1
-; THUMB78-NEXT: movs r2, #1
-; THUMB78-NEXT: lsl.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxtb r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
-; THUMB78-NEXT: bx lr
+; THUMB7-LABEL: scalar_i8_lowestbit_eq:
+; THUMB7: @ %bb.0:
+; THUMB7-NEXT: uxtb r1, r1
+; THUMB7-NEXT: uxtb r0, r0
+; THUMB7-NEXT: lsrs r0, r1
+; THUMB7-NEXT: movs r1, #1
+; THUMB7-NEXT: bic.w r0, r1, r0
+; THUMB7-NEXT: bx lr
+;
+; THUMB8-LABEL: scalar_i8_lowestbit_eq:
+; THUMB8: @ %bb.0:
+; THUMB8-NEXT: uxtb r0, r0
+; THUMB8-NEXT: uxtb r1, r1
+; THUMB8-NEXT: lsrs r0, r1
+; THUMB8-NEXT: movs r1, #1
+; THUMB8-NEXT: bic.w r0, r1, r0
+; THUMB8-NEXT: bx lr
%t0 = shl i8 1, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 0
@@ -98,9 +111,9 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; ARM-LABEL: scalar_i8_bitsinmiddle_eq:
; ARM: @ %bb.0:
; ARM-NEXT: uxtb r1, r1
-; ARM-NEXT: mov r2, #24
-; ARM-NEXT: and r0, r0, r2, lsl r1
; ARM-NEXT: uxtb r0, r0
+; ARM-NEXT: mov r2, #24
+; ARM-NEXT: and r0, r2, r0, lsr r1
; ARM-NEXT: clz r0, r0
; ARM-NEXT: lsr r0, r0, #5
; ARM-NEXT: bx lr
@@ -108,24 +121,33 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; THUMB6-LABEL: scalar_i8_bitsinmiddle_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxtb r1, r1
-; THUMB6-NEXT: movs r2, #24
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxtb r1, r2
+; THUMB6-NEXT: uxtb r0, r0
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: movs r1, #24
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
;
-; THUMB78-LABEL: scalar_i8_bitsinmiddle_eq:
-; THUMB78: @ %bb.0:
-; THUMB78-NEXT: uxtb r1, r1
-; THUMB78-NEXT: movs r2, #24
-; THUMB78-NEXT: lsl.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxtb r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
-; THUMB78-NEXT: bx lr
+; THUMB7-LABEL: scalar_i8_bitsinmiddle_eq:
+; THUMB7: @ %bb.0:
+; THUMB7-NEXT: uxtb r1, r1
+; THUMB7-NEXT: uxtb r0, r0
+; THUMB7-NEXT: lsrs r0, r1
+; THUMB7-NEXT: and r0, r0, #24
+; THUMB7-NEXT: clz r0, r0
+; THUMB7-NEXT: lsrs r0, r0, #5
+; THUMB7-NEXT: bx lr
+;
+; THUMB8-LABEL: scalar_i8_bitsinmiddle_eq:
+; THUMB8: @ %bb.0:
+; THUMB8-NEXT: uxtb r0, r0
+; THUMB8-NEXT: uxtb r1, r1
+; THUMB8-NEXT: lsrs r0, r1
+; THUMB8-NEXT: and r0, r0, #24
+; THUMB8-NEXT: clz r0, r0
+; THUMB8-NEXT: lsrs r0, r0, #5
+; THUMB8-NEXT: bx lr
%t0 = shl i8 24, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 0
@@ -135,57 +157,47 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; i16 scalar
define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
-; ARM6-LABEL: scalar_i16_signbit_eq:
-; ARM6: @ %bb.0:
-; ARM6-NEXT: ldr r2, .LCPI3_0
-; ARM6-NEXT: uxth r1, r1
-; ARM6-NEXT: and r0, r0, r2, lsl r1
-; ARM6-NEXT: uxth r0, r0
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: bx lr
-; ARM6-NEXT: .p2align 2
-; ARM6-NEXT: @ %bb.1:
-; ARM6-NEXT: .LCPI3_0:
-; ARM6-NEXT: .long 4294934528 @ 0xffff8000
-;
-; ARM78-LABEL: scalar_i16_signbit_eq:
-; ARM78: @ %bb.0:
-; ARM78-NEXT: movw r2, #32768
-; ARM78-NEXT: uxth r1, r1
-; ARM78-NEXT: movt r2, #65535
-; ARM78-NEXT: and r0, r0, r2, lsl r1
-; ARM78-NEXT: uxth r0, r0
-; ARM78-NEXT: clz r0, r0
-; ARM78-NEXT: lsr r0, r0, #5
-; ARM78-NEXT: bx lr
+; ARM-LABEL: scalar_i16_signbit_eq:
+; ARM: @ %bb.0:
+; ARM-NEXT: uxth r1, r1
+; ARM-NEXT: uxth r0, r0
+; ARM-NEXT: lsr r0, r0, r1
+; ARM-NEXT: mov r1, #1
+; ARM-NEXT: uxth r0, r0
+; ARM-NEXT: eor r0, r1, r0, lsr #15
+; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i16_signbit_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxth r1, r1
-; THUMB6-NEXT: ldr r2, .LCPI3_0
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxth r1, r2
+; THUMB6-NEXT: uxth r0, r0
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: lsls r1, r1, #15
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
-; THUMB6-NEXT: .p2align 2
-; THUMB6-NEXT: @ %bb.1:
-; THUMB6-NEXT: .LCPI3_0:
-; THUMB6-NEXT: .long 4294934528 @ 0xffff8000
;
-; THUMB78-LABEL: scalar_i16_signbit_eq:
-; THUMB78: @ %bb.0:
-; THUMB78-NEXT: movw r2, #32768
-; THUMB78-NEXT: uxth r1, r1
-; THUMB78-NEXT: movt r2, #65535
-; THUMB78-NEXT: lsl.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxth r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
-; THUMB78-NEXT: bx lr
+; THUMB7-LABEL: scalar_i16_signbit_eq:
+; THUMB7: @ %bb.0:
+; THUMB7-NEXT: uxth r1, r1
+; THUMB7-NEXT: uxth r0, r0
+; THUMB7-NEXT: lsrs r0, r1
+; THUMB7-NEXT: movs r1, #1
+; THUMB7-NEXT: uxth r0, r0
+; THUMB7-NEXT: eor.w r0, r1, r0, lsr #15
+; THUMB7-NEXT: bx lr
+;
+; THUMB8-LABEL: scalar_i16_signbit_eq:
+; THUMB8: @ %bb.0:
+; THUMB8-NEXT: uxth r0, r0
+; THUMB8-NEXT: uxth r1, r1
+; THUMB8-NEXT: lsrs r0, r1
+; THUMB8-NEXT: movs r1, #1
+; THUMB8-NEXT: uxth r0, r0
+; THUMB8-NEXT: eor.w r0, r1, r0, lsr #15
+; THUMB8-NEXT: bx lr
%t0 = shl i16 32768, %y
%t1 = and i16 %t0, %x
%res = icmp eq i16 %t1, 0
@@ -196,34 +208,39 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; ARM-LABEL: scalar_i16_lowestbit_eq:
; ARM: @ %bb.0:
; ARM-NEXT: uxth r1, r1
-; ARM-NEXT: mov r2, #1
-; ARM-NEXT: and r0, r0, r2, lsl r1
; ARM-NEXT: uxth r0, r0
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: mov r2, #1
+; ARM-NEXT: bic r0, r2, r0, lsr r1
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i16_lowestbit_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxth r1, r1
-; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxth r1, r2
+; THUMB6-NEXT: uxth r0, r0
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
;
-; THUMB78-LABEL: scalar_i16_lowestbit_eq:
-; THUMB78: @ %bb.0:
-; THUMB78-NEXT: uxth r1, r1
-; THUMB78-NEXT: movs r2, #1
-; THUMB78-NEXT: lsl.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxth r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
-; THUMB78-NEXT: bx lr
+; THUMB7-LABEL: scalar_i16_lowestbit_eq:
+; THUMB7: @ %bb.0:
+; THUMB7-NEXT: uxth r1, r1
+; THUMB7-NEXT: uxth r0, r0
+; THUMB7-NEXT: lsrs r0, r1
+; THUMB7-NEXT: movs r1, #1
+; THUMB7-NEXT: bic.w r0, r1, r0
+; THUMB7-NEXT: bx lr
+;
+; THUMB8-LABEL: scalar_i16_lowestbit_eq:
+; THUMB8: @ %bb.0:
+; THUMB8-NEXT: uxth r0, r0
+; THUMB8-NEXT: uxth r1, r1
+; THUMB8-NEXT: lsrs r0, r1
+; THUMB8-NEXT: movs r1, #1
+; THUMB8-NEXT: bic.w r0, r1, r0
+; THUMB8-NEXT: bx lr
%t0 = shl i16 1, %y
%t1 = and i16 %t0, %x
%res = icmp eq i16 %t1, 0
@@ -234,9 +251,9 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; ARM-LABEL: scalar_i16_bitsinmiddle_eq:
; ARM: @ %bb.0:
; ARM-NEXT: uxth r1, r1
-; ARM-NEXT: mov r2, #4080
-; ARM-NEXT: and r0, r0, r2, lsl r1
; ARM-NEXT: uxth r0, r0
+; ARM-NEXT: mov r2, #4080
+; ARM-NEXT: and r0, r2, r0, lsr r1
; ARM-NEXT: clz r0, r0
; ARM-NEXT: lsr r0, r0, #5
; ARM-NEXT: bx lr
@@ -244,25 +261,34 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; THUMB6-LABEL: scalar_i16_bitsinmiddle_eq:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxth r1, r1
-; THUMB6-NEXT: movs r2, #255
-; THUMB6-NEXT: lsls r2, r2, #4
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxth r1, r2
+; THUMB6-NEXT: uxth r0, r0
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: movs r1, #255
+; THUMB6-NEXT: lsls r1, r1, #4
+; THUMB6-NEXT: ands r1, r0
; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
;
-; THUMB78-LABEL: scalar_i16_bitsinmiddle_eq:
-; THUMB78: @ %bb.0:
-; THUMB78-NEXT: uxth r1, r1
-; THUMB78-NEXT: mov.w r2, #4080
-; THUMB78-NEXT: lsl.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxth r0, r0
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
-; THUMB78-NEXT: bx lr
+; THUMB7-LABEL: scalar_i16_bitsinmiddle_eq:
+; THUMB7: @ %bb.0:
+; THUMB7-NEXT: uxth r1, r1
+; THUMB7-NEXT: uxth r0, r0
+; THUMB7-NEXT: lsrs r0, r1
+; THUMB7-NEXT: and r0, r0, #4080
+; THUMB7-NEXT: clz r0, r0
+; THUMB7-NEXT: lsrs r0, r0, #5
+; THUMB7-NEXT: bx lr
+;
+; THUMB8-LABEL: scalar_i16_bitsinmiddle_eq:
+; THUMB8: @ %bb.0:
+; THUMB8-NEXT: uxth r0, r0
+; THUMB8-NEXT: uxth r1, r1
+; THUMB8-NEXT: lsrs r0, r1
+; THUMB8-NEXT: and r0, r0, #4080
+; THUMB8-NEXT: clz r0, r0
+; THUMB8-NEXT: lsrs r0, r0, #5
+; THUMB8-NEXT: bx lr
%t0 = shl i16 4080, %y
%t1 = and i16 %t0, %x
%res = icmp eq i16 %t1, 0
@@ -274,29 +300,25 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; ARM-LABEL: scalar_i32_signbit_eq:
; ARM: @ %bb.0:
-; ARM-NEXT: mov r2, #-2147483648
-; ARM-NEXT: and r0, r0, r2, lsl r1
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: mvn r0, r0, lsr r1
+; ARM-NEXT: lsr r0, r0, #31
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i32_signbit_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: lsls r2, r2, #31
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: rsbs r0, r2, #0
-; THUMB6-NEXT: adcs r0, r2
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: lsls r1, r1, #31
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
+; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
;
; THUMB78-LABEL: scalar_i32_signbit_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: mov.w r2, #-2147483648
-; THUMB78-NEXT: lsl.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: lsrs r0, r1
+; THUMB78-NEXT: mvns r0, r0
+; THUMB78-NEXT: lsrs r0, r0, #31
; THUMB78-NEXT: bx lr
%t0 = shl i32 2147483648, %y
%t1 = and i32 %t0, %x
@@ -308,27 +330,23 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
; ARM-LABEL: scalar_i32_lowestbit_eq:
; ARM: @ %bb.0:
; ARM-NEXT: mov r2, #1
-; ARM-NEXT: and r0, r0, r2, lsl r1
-; ARM-NEXT: clz r0, r0
-; ARM-NEXT: lsr r0, r0, #5
+; ARM-NEXT: bic r0, r2, r0, lsr r1
; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i32_lowestbit_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: rsbs r0, r2, #0
-; THUMB6-NEXT: adcs r0, r2
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
+; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
;
; THUMB78-LABEL: scalar_i32_lowestbit_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: movs r2, #1
-; THUMB78-NEXT: lsl.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: lsrs r0, r1
+; THUMB78-NEXT: movs r1, #1
+; THUMB78-NEXT: bic.w r0, r1, r0
; THUMB78-NEXT: bx lr
%t0 = shl i32 1, %y
%t1 = and i32 %t0, %x
@@ -341,7 +359,7 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; ARM6: @ %bb.0:
; ARM6-NEXT: mov r2, #65280
; ARM6-NEXT: orr r2, r2, #16711680
-; ARM6-NEXT: and r0, r0, r2, lsl r1
+; ARM6-NEXT: and r0, r2, r0, lsr r1
; ARM6-NEXT: clz r0, r0
; ARM6-NEXT: lsr r0, r0, #5
; ARM6-NEXT: bx lr
@@ -350,18 +368,18 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; ARM78: @ %bb.0:
; ARM78-NEXT: movw r2, #65280
; ARM78-NEXT: movt r2, #255
-; ARM78-NEXT: and r0, r0, r2, lsl r1
+; ARM78-NEXT: and r0, r2, r0, lsr r1
; ARM78-NEXT: clz r0, r0
; ARM78-NEXT: lsr r0, r0, #5
; ARM78-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i32_bitsinmiddle_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: ldr r2, .LCPI8_0
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: rsbs r0, r2, #0
-; THUMB6-NEXT: adcs r0, r2
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: ldr r1, .LCPI8_0
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
+; THUMB6-NEXT: adcs r0, r1
; THUMB6-NEXT: bx lr
; THUMB6-NEXT: .p2align 2
; THUMB6-NEXT: @ %bb.1:
@@ -370,9 +388,9 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
;
; THUMB78-LABEL: scalar_i32_bitsinmiddle_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: movw r2, #65280
-; THUMB78-NEXT: movt r2, #255
-; THUMB78-NEXT: lsl.w r1, r2, r1
+; THUMB78-NEXT: lsrs r0, r1
+; THUMB78-NEXT: movw r1, #65280
+; THUMB78-NEXT: movt r1, #255
; THUMB78-NEXT: ands r0, r1
; THUMB78-NEXT: clz r0, r0
; THUMB78-NEXT: lsrs r0, r0, #5
@@ -388,52 +406,41 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
; ARM6-LABEL: scalar_i64_signbit_eq:
; ARM6: @ %bb.0:
-; ARM6-NEXT: mov r0, #-2147483648
-; ARM6-NEXT: lsl r0, r0, r2
-; ARM6-NEXT: subs r2, r2, #32
+; ARM6-NEXT: lsr r0, r1, r2
+; ARM6-NEXT: subs r1, r2, #32
; ARM6-NEXT: movpl r0, #0
-; ARM6-NEXT: and r0, r0, r1
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: lsr r0, r0, #5
+; ARM6-NEXT: mvn r0, r0
+; ARM6-NEXT: lsr r0, r0, #31
; ARM6-NEXT: bx lr
;
; ARM78-LABEL: scalar_i64_signbit_eq:
; ARM78: @ %bb.0:
-; ARM78-NEXT: mov r0, #-2147483648
-; ARM78-NEXT: lsl r0, r0, r2
-; ARM78-NEXT: subs r2, r2, #32
+; ARM78-NEXT: lsr r0, r1, r2
+; ARM78-NEXT: subs r1, r2, #32
; ARM78-NEXT: movwpl r0, #0
-; ARM78-NEXT: and r0, r0, r1
-; ARM78-NEXT: clz r0, r0
-; ARM78-NEXT: lsr r0, r0, #5
+; ARM78-NEXT: mvn r0, r0
+; ARM78-NEXT: lsr r0, r0, #31
; ARM78-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i64_signbit_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: mov r4, r1
-; THUMB6-NEXT: mov r5, r0
+; THUMB6-NEXT: push {r7, lr}
+; THUMB6-NEXT: bl __lshrdi3
; THUMB6-NEXT: movs r0, #1
-; THUMB6-NEXT: lsls r1, r0, #31
-; THUMB6-NEXT: movs r0, #0
-; THUMB6-NEXT: bl __ashldi3
-; THUMB6-NEXT: ands r1, r4
-; THUMB6-NEXT: ands r0, r5
-; THUMB6-NEXT: orrs r0, r1
-; THUMB6-NEXT: rsbs r1, r0, #0
-; THUMB6-NEXT: adcs r0, r1
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: lsls r2, r0, #31
+; THUMB6-NEXT: ands r2, r1
+; THUMB6-NEXT: rsbs r0, r2, #0
+; THUMB6-NEXT: adcs r0, r2
+; THUMB6-NEXT: pop {r7, pc}
;
; THUMB78-LABEL: scalar_i64_signbit_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: mov.w r0, #-2147483648
-; THUMB78-NEXT: lsls r0, r2
-; THUMB78-NEXT: subs r2, #32
+; THUMB78-NEXT: lsr.w r0, r1, r2
+; THUMB78-NEXT: subs.w r1, r2, #32
; THUMB78-NEXT: it pl
; THUMB78-NEXT: movpl r0, #0
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: clz r0, r0
-; THUMB78-NEXT: lsrs r0, r0, #5
+; THUMB78-NEXT: mvns r0, r0
+; THUMB78-NEXT: lsrs r0, r0, #31
; THUMB78-NEXT: bx lr
%t0 = shl i64 9223372036854775808, %y
%t1 = and i64 %t0, %x
@@ -442,94 +449,53 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
}
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
-; ARM6-LABEL: scalar_i64_lowestbit_eq:
-; ARM6: @ %bb.0:
-; ARM6-NEXT: push {r11, lr}
-; ARM6-NEXT: mov r12, #1
-; ARM6-NEXT: subs lr, r2, #32
-; ARM6-NEXT: lsl r3, r12, r2
-; ARM6-NEXT: rsb r2, r2, #32
-; ARM6-NEXT: movpl r3, #0
-; ARM6-NEXT: and r0, r3, r0
-; ARM6-NEXT: lsr r2, r12, r2
-; ARM6-NEXT: lslpl r2, r12, lr
-; ARM6-NEXT: and r1, r2, r1
-; ARM6-NEXT: orr r0, r0, r1
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: pop {r11, pc}
-;
-; ARM78-LABEL: scalar_i64_lowestbit_eq:
-; ARM78: @ %bb.0:
-; ARM78-NEXT: push {r11, lr}
-; ARM78-NEXT: mov r12, #1
-; ARM78-NEXT: subs lr, r2, #32
-; ARM78-NEXT: lsl r3, r12, r2
-; ARM78-NEXT: rsb r2, r2, #32
-; ARM78-NEXT: movwpl r3, #0
-; ARM78-NEXT: and r0, r3, r0
-; ARM78-NEXT: lsr r2, r12, r2
-; ARM78-NEXT: lslpl r2, r12, lr
-; ARM78-NEXT: and r1, r2, r1
-; ARM78-NEXT: orr r0, r0, r1
-; ARM78-NEXT: clz r0, r0
-; ARM78-NEXT: lsr r0, r0, #5
-; ARM78-NEXT: pop {r11, pc}
+; ARM-LABEL: scalar_i64_lowestbit_eq:
+; ARM: @ %bb.0:
+; ARM-NEXT: rsb r3, r2, #32
+; ARM-NEXT: lsr r0, r0, r2
+; ARM-NEXT: subs r2, r2, #32
+; ARM-NEXT: orr r0, r0, r1, lsl r3
+; ARM-NEXT: lsrpl r0, r1, r2
+; ARM-NEXT: mov r1, #1
+; ARM-NEXT: bic r0, r1, r0
+; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i64_lowestbit_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: mov r4, r1
-; THUMB6-NEXT: mov r5, r0
-; THUMB6-NEXT: movs r0, #1
-; THUMB6-NEXT: movs r1, #0
-; THUMB6-NEXT: bl __ashldi3
-; THUMB6-NEXT: ands r1, r4
-; THUMB6-NEXT: ands r0, r5
-; THUMB6-NEXT: orrs r0, r1
-; THUMB6-NEXT: rsbs r1, r0, #0
+; THUMB6-NEXT: push {r7, lr}
+; THUMB6-NEXT: bl __lshrdi3
+; THUMB6-NEXT: movs r1, #1
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r7, pc}
;
; THUMB7-LABEL: scalar_i64_lowestbit_eq:
; THUMB7: @ %bb.0:
-; THUMB7-NEXT: push {r7, lr}
; THUMB7-NEXT: rsb.w r3, r2, #32
-; THUMB7-NEXT: mov.w r12, #1
-; THUMB7-NEXT: subs.w lr, r2, #32
-; THUMB7-NEXT: lsl.w r2, r12, r2
-; THUMB7-NEXT: lsr.w r3, r12, r3
-; THUMB7-NEXT: it pl
-; THUMB7-NEXT: lslpl.w r3, r12, lr
+; THUMB7-NEXT: lsrs r0, r2
+; THUMB7-NEXT: subs r2, #32
+; THUMB7-NEXT: lsl.w r3, r1, r3
+; THUMB7-NEXT: orr.w r0, r0, r3
; THUMB7-NEXT: it pl
-; THUMB7-NEXT: movpl r2, #0
-; THUMB7-NEXT: ands r1, r3
-; THUMB7-NEXT: ands r0, r2
-; THUMB7-NEXT: orrs r0, r1
-; THUMB7-NEXT: clz r0, r0
-; THUMB7-NEXT: lsrs r0, r0, #5
-; THUMB7-NEXT: pop {r7, pc}
+; THUMB7-NEXT: lsrpl.w r0, r1, r2
+; THUMB7-NEXT: movs r1, #1
+; THUMB7-NEXT: bic.w r0, r1, r0
+; THUMB7-NEXT: bx lr
;
; THUMB8-LABEL: scalar_i64_lowestbit_eq:
; THUMB8: @ %bb.0:
-; THUMB8-NEXT: .save {r7, lr}
-; THUMB8-NEXT: push {r7, lr}
-; THUMB8-NEXT: subs.w r3, r2, #32
-; THUMB8-NEXT: mov.w r12, #1
-; THUMB8-NEXT: lsl.w lr, r12, r3
; THUMB8-NEXT: rsb.w r3, r2, #32
-; THUMB8-NEXT: lsl.w r2, r12, r2
-; THUMB8-NEXT: lsr.w r3, r12, r3
-; THUMB8-NEXT: it pl
-; THUMB8-NEXT: movpl r3, lr
-; THUMB8-NEXT: it pl
-; THUMB8-NEXT: movpl r2, #0
-; THUMB8-NEXT: ands r1, r3
-; THUMB8-NEXT: ands r0, r2
-; THUMB8-NEXT: orrs r0, r1
-; THUMB8-NEXT: clz r0, r0
-; THUMB8-NEXT: lsrs r0, r0, #5
-; THUMB8-NEXT: pop {r7, pc}
+; THUMB8-NEXT: lsrs r0, r2
+; THUMB8-NEXT: lsl.w r3, r1, r3
+; THUMB8-NEXT: orrs r0, r3
+; THUMB8-NEXT: subs r2, #32
+; THUMB8-NEXT: lsr.w r1, r1, r2
+; THUMB8-NEXT: it mi
+; THUMB8-NEXT: movmi r1, r0
+; THUMB8-NEXT: movs r0, #1
+; THUMB8-NEXT: bics r0, r1
+; THUMB8-NEXT: bx lr
%t0 = shl i64 1, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -539,115 +505,82 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; ARM6-LABEL: scalar_i64_bitsinmiddle_eq:
; ARM6: @ %bb.0:
-; ARM6-NEXT: push {r4, lr}
-; ARM6-NEXT: mov r12, #16711680
-; ARM6-NEXT: subs lr, r2, #32
-; ARM6-NEXT: orr r12, r12, #-16777216
-; ARM6-NEXT: mov r4, #255
-; ARM6-NEXT: orr r4, r4, #65280
-; ARM6-NEXT: lsl r3, r12, r2
-; ARM6-NEXT: movpl r3, #0
-; ARM6-NEXT: and r0, r3, r0
; ARM6-NEXT: rsb r3, r2, #32
-; ARM6-NEXT: cmp lr, #0
-; ARM6-NEXT: lsr r3, r12, r3
-; ARM6-NEXT: orr r2, r3, r4, lsl r2
-; ARM6-NEXT: lslpl r2, r12, lr
-; ARM6-NEXT: and r1, r2, r1
-; ARM6-NEXT: orr r0, r0, r1
+; ARM6-NEXT: lsr r0, r0, r2
+; ARM6-NEXT: orr r0, r0, r1, lsl r3
+; ARM6-NEXT: subs r3, r2, #32
+; ARM6-NEXT: lsrpl r0, r1, r3
+; ARM6-NEXT: lsr r1, r1, r2
+; ARM6-NEXT: movpl r1, #0
+; ARM6-NEXT: pkhbt r0, r1, r0
; ARM6-NEXT: clz r0, r0
; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: pop {r4, pc}
+; ARM6-NEXT: bx lr
;
; ARM78-LABEL: scalar_i64_bitsinmiddle_eq:
; ARM78: @ %bb.0:
-; ARM78-NEXT: push {r4, lr}
-; ARM78-NEXT: movw r12, #0
-; ARM78-NEXT: subs lr, r2, #32
-; ARM78-NEXT: movt r12, #65535
-; ARM78-NEXT: movw r4, #65535
-; ARM78-NEXT: lsl r3, r12, r2
-; ARM78-NEXT: movwpl r3, #0
-; ARM78-NEXT: and r0, r3, r0
; ARM78-NEXT: rsb r3, r2, #32
-; ARM78-NEXT: cmp lr, #0
-; ARM78-NEXT: lsr r3, r12, r3
-; ARM78-NEXT: orr r2, r3, r4, lsl r2
-; ARM78-NEXT: lslpl r2, r12, lr
-; ARM78-NEXT: and r1, r2, r1
-; ARM78-NEXT: orr r0, r0, r1
+; ARM78-NEXT: lsr r0, r0, r2
+; ARM78-NEXT: orr r0, r0, r1, lsl r3
+; ARM78-NEXT: subs r3, r2, #32
+; ARM78-NEXT: lsrpl r0, r1, r3
+; ARM78-NEXT: lsr r1, r1, r2
+; ARM78-NEXT: movwpl r1, #0
+; ARM78-NEXT: pkhbt r0, r1, r0
; ARM78-NEXT: clz r0, r0
; ARM78-NEXT: lsr r0, r0, #5
-; ARM78-NEXT: pop {r4, pc}
+; ARM78-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i64_bitsinmiddle_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: mov r4, r1
-; THUMB6-NEXT: mov r5, r0
-; THUMB6-NEXT: ldr r0, .LCPI11_0
-; THUMB6-NEXT: ldr r1, .LCPI11_1
-; THUMB6-NEXT: bl __ashldi3
-; THUMB6-NEXT: ands r1, r4
-; THUMB6-NEXT: ands r0, r5
-; THUMB6-NEXT: orrs r0, r1
-; THUMB6-NEXT: rsbs r1, r0, #0
+; THUMB6-NEXT: push {r7, lr}
+; THUMB6-NEXT: bl __lshrdi3
+; THUMB6-NEXT: ldr r2, .LCPI11_0
+; THUMB6-NEXT: ands r2, r0
+; THUMB6-NEXT: uxth r0, r1
+; THUMB6-NEXT: adds r1, r2, r0
+; THUMB6-NEXT: rsbs r0, r1, #0
; THUMB6-NEXT: adcs r0, r1
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r7, pc}
; THUMB6-NEXT: .p2align 2
; THUMB6-NEXT: @ %bb.1:
; THUMB6-NEXT: .LCPI11_0:
; THUMB6-NEXT: .long 4294901760 @ 0xffff0000
-; THUMB6-NEXT: .LCPI11_1:
-; THUMB6-NEXT: .long 65535 @ 0xffff
;
; THUMB7-LABEL: scalar_i64_bitsinmiddle_eq:
; THUMB7: @ %bb.0:
-; THUMB7-NEXT: push {r7, lr}
-; THUMB7-NEXT: movw r3, #65535
-; THUMB7-NEXT: movw lr, #0
-; THUMB7-NEXT: lsl.w r12, r3, r2
; THUMB7-NEXT: rsb.w r3, r2, #32
-; THUMB7-NEXT: movt lr, #65535
-; THUMB7-NEXT: lsr.w r3, lr, r3
-; THUMB7-NEXT: orr.w r12, r12, r3
+; THUMB7-NEXT: lsrs r0, r2
+; THUMB7-NEXT: lsl.w r3, r1, r3
+; THUMB7-NEXT: orrs r0, r3
; THUMB7-NEXT: subs.w r3, r2, #32
-; THUMB7-NEXT: lsl.w r2, lr, r2
; THUMB7-NEXT: it pl
-; THUMB7-NEXT: lslpl.w r12, lr, r3
+; THUMB7-NEXT: lsrpl.w r0, r1, r3
+; THUMB7-NEXT: lsr.w r1, r1, r2
; THUMB7-NEXT: it pl
-; THUMB7-NEXT: movpl r2, #0
-; THUMB7-NEXT: and.w r1, r1, r12
-; THUMB7-NEXT: ands r0, r2
-; THUMB7-NEXT: orrs r0, r1
+; THUMB7-NEXT: movpl r1, #0
+; THUMB7-NEXT: pkhbt r0, r1, r0
; THUMB7-NEXT: clz r0, r0
; THUMB7-NEXT: lsrs r0, r0, #5
-; THUMB7-NEXT: pop {r7, pc}
+; THUMB7-NEXT: bx lr
;
; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq:
; THUMB8: @ %bb.0:
-; THUMB8-NEXT: .save {r7, lr}
-; THUMB8-NEXT: push {r7, lr}
-; THUMB8-NEXT: movw r3, #65535
-; THUMB8-NEXT: movw lr, #0
-; THUMB8-NEXT: lsl.w r12, r3, r2
; THUMB8-NEXT: rsb.w r3, r2, #32
-; THUMB8-NEXT: movt lr, #65535
-; THUMB8-NEXT: lsr.w r3, lr, r3
-; THUMB8-NEXT: orr.w r12, r12, r3
+; THUMB8-NEXT: lsrs r0, r2
+; THUMB8-NEXT: lsl.w r3, r1, r3
+; THUMB8-NEXT: orrs r0, r3
; THUMB8-NEXT: subs.w r3, r2, #32
-; THUMB8-NEXT: lsl.w r2, lr, r2
-; THUMB8-NEXT: lsl.w r3, lr, r3
+; THUMB8-NEXT: lsr.w r3, r1, r3
; THUMB8-NEXT: it mi
-; THUMB8-NEXT: movmi r3, r12
+; THUMB8-NEXT: movmi r3, r0
+; THUMB8-NEXT: lsr.w r0, r1, r2
; THUMB8-NEXT: it pl
-; THUMB8-NEXT: movpl r2, #0
-; THUMB8-NEXT: ands r1, r3
-; THUMB8-NEXT: ands r0, r2
-; THUMB8-NEXT: orrs r0, r1
+; THUMB8-NEXT: movpl r0, #0
+; THUMB8-NEXT: pkhbt r0, r0, r3
; THUMB8-NEXT: clz r0, r0
; THUMB8-NEXT: lsrs r0, r0, #5
-; THUMB8-NEXT: pop {r7, pc}
+; THUMB8-NEXT: bx lr
%t0 = shl i64 281474976645120, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@@ -664,32 +597,25 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; ARM6-NEXT: push {r11, lr}
; ARM6-NEXT: ldr r12, [sp, #8]
; ARM6-NEXT: mov lr, #1
-; ARM6-NEXT: and r0, r0, lr, lsl r12
+; ARM6-NEXT: bic r0, lr, r0, lsr r12
; ARM6-NEXT: ldr r12, [sp, #12]
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: and r1, r1, lr, lsl r12
+; ARM6-NEXT: bic r1, lr, r1, lsr r12
; ARM6-NEXT: ldr r12, [sp, #16]
-; ARM6-NEXT: clz r1, r1
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: and r2, r2, lr, lsl r12
+; ARM6-NEXT: bic r2, lr, r2, lsr r12
; ARM6-NEXT: ldr r12, [sp, #20]
-; ARM6-NEXT: clz r2, r2
-; ARM6-NEXT: lsr r1, r1, #5
-; ARM6-NEXT: and r3, r3, lr, lsl r12
-; ARM6-NEXT: lsr r2, r2, #5
-; ARM6-NEXT: clz r3, r3
-; ARM6-NEXT: lsr r3, r3, #5
+; ARM6-NEXT: bic r3, lr, r3, lsr r12
; ARM6-NEXT: pop {r11, pc}
;
; ARM78-LABEL: vec_4xi32_splat_eq:
; ARM78: @ %bb.0:
-; ARM78-NEXT: vmov.i32 q8, #0x1
; ARM78-NEXT: mov r12, sp
-; ARM78-NEXT: vld1.64 {d18, d19}, [r12]
-; ARM78-NEXT: vshl.u32 q8, q8, q9
+; ARM78-NEXT: vld1.64 {d16, d17}, [r12]
; ARM78-NEXT: vmov d19, r2, r3
+; ARM78-NEXT: vneg.s32 q8, q8
; ARM78-NEXT: vmov d18, r0, r1
-; ARM78-NEXT: vtst.32 q8, q8, q9
+; ARM78-NEXT: vmov.i32 q10, #0x1
+; ARM78-NEXT: vshl.u32 q8, q9, q8
+; ARM78-NEXT: vtst.32 q8, q8, q10
; ARM78-NEXT: vmvn q8, q8
; ARM78-NEXT: vmovn.i32 d16, q8
; ARM78-NEXT: vmov r0, r1, d16
@@ -697,42 +623,40 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
;
; THUMB6-LABEL: vec_4xi32_splat_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r6, lr}
-; THUMB6-NEXT: ldr r5, [sp, #16]
+; THUMB6-NEXT: push {r4, r5, r7, lr}
+; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: lsrs r0, r4
; THUMB6-NEXT: movs r4, #1
-; THUMB6-NEXT: mov r6, r4
-; THUMB6-NEXT: lsls r6, r5
-; THUMB6-NEXT: ands r6, r0
-; THUMB6-NEXT: rsbs r0, r6, #0
-; THUMB6-NEXT: adcs r0, r6
+; THUMB6-NEXT: ands r0, r4
+; THUMB6-NEXT: rsbs r5, r0, #0
+; THUMB6-NEXT: adcs r0, r5
; THUMB6-NEXT: ldr r5, [sp, #20]
-; THUMB6-NEXT: mov r6, r4
-; THUMB6-NEXT: lsls r6, r5
-; THUMB6-NEXT: ands r6, r1
-; THUMB6-NEXT: rsbs r1, r6, #0
-; THUMB6-NEXT: adcs r1, r6
+; THUMB6-NEXT: lsrs r1, r5
+; THUMB6-NEXT: ands r1, r4
+; THUMB6-NEXT: rsbs r5, r1, #0
+; THUMB6-NEXT: adcs r1, r5
; THUMB6-NEXT: ldr r5, [sp, #24]
-; THUMB6-NEXT: mov r6, r4
-; THUMB6-NEXT: lsls r6, r5
-; THUMB6-NEXT: ands r6, r2
-; THUMB6-NEXT: rsbs r2, r6, #0
-; THUMB6-NEXT: adcs r2, r6
+; THUMB6-NEXT: lsrs r2, r5
+; THUMB6-NEXT: ands r2, r4
+; THUMB6-NEXT: rsbs r5, r2, #0
+; THUMB6-NEXT: adcs r2, r5
; THUMB6-NEXT: ldr r5, [sp, #28]
-; THUMB6-NEXT: lsls r4, r5
-; THUMB6-NEXT: ands r4, r3
-; THUMB6-NEXT: rsbs r3, r4, #0
+; THUMB6-NEXT: lsrs r3, r5
+; THUMB6-NEXT: ands r3, r4
+; THUMB6-NEXT: rsbs r4, r3, #0
; THUMB6-NEXT: adcs r3, r4
-; THUMB6-NEXT: pop {r4, r5, r6, pc}
+; THUMB6-NEXT: pop {r4, r5, r7, pc}
;
; THUMB78-LABEL: vec_4xi32_splat_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: vmov.i32 q8, #0x1
; THUMB78-NEXT: mov r12, sp
-; THUMB78-NEXT: vld1.64 {d18, d19}, [r12]
-; THUMB78-NEXT: vshl.u32 q8, q8, q9
+; THUMB78-NEXT: vld1.64 {d16, d17}, [r12]
; THUMB78-NEXT: vmov d19, r2, r3
+; THUMB78-NEXT: vneg.s32 q8, q8
; THUMB78-NEXT: vmov d18, r0, r1
-; THUMB78-NEXT: vtst.32 q8, q8, q9
+; THUMB78-NEXT: vmov.i32 q10, #0x1
+; THUMB78-NEXT: vshl.u32 q8, q9, q8
+; THUMB78-NEXT: vtst.32 q8, q8, q10
; THUMB78-NEXT: vmvn q8, q8
; THUMB78-NEXT: vmovn.i32 d16, q8
; THUMB78-NEXT: vmov r0, r1, d16
@@ -748,20 +672,16 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; ARM6: @ %bb.0:
; ARM6-NEXT: ldr r12, [sp, #4]
; ARM6-NEXT: mov r0, #1
-; ARM6-NEXT: and r0, r1, r0, lsl r12
+; ARM6-NEXT: bic r1, r0, r1, lsr r12
; ARM6-NEXT: ldr r12, [sp, #8]
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: lsr r1, r0, #5
; ARM6-NEXT: mov r0, #65280
; ARM6-NEXT: orr r0, r0, #16711680
-; ARM6-NEXT: and r0, r2, r0, lsl r12
-; ARM6-NEXT: ldr r12, [sp, #12]
+; ARM6-NEXT: and r0, r0, r2, lsr r12
; ARM6-NEXT: clz r0, r0
; ARM6-NEXT: lsr r2, r0, #5
-; ARM6-NEXT: mov r0, #-2147483648
-; ARM6-NEXT: and r0, r3, r0, lsl r12
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: lsr r3, r0, #5
+; ARM6-NEXT: ldr r0, [sp, #12]
+; ARM6-NEXT: mvn r0, r3, lsr r0
+; ARM6-NEXT: lsr r3, r0, #31
; ARM6-NEXT: mov r0, #1
; ARM6-NEXT: bx lr
;
@@ -789,27 +709,26 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
;
; THUMB6-LABEL: vec_4xi32_nonsplat_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: ldr r4, [sp, #20]
+; THUMB6-NEXT: push {r4, lr}
+; THUMB6-NEXT: ldr r0, [sp, #12]
+; THUMB6-NEXT: lsrs r1, r0
; THUMB6-NEXT: movs r0, #1
-; THUMB6-NEXT: mov r5, r0
-; THUMB6-NEXT: lsls r5, r4
-; THUMB6-NEXT: ands r5, r1
-; THUMB6-NEXT: rsbs r1, r5, #0
-; THUMB6-NEXT: adcs r1, r5
-; THUMB6-NEXT: ldr r4, [sp, #24]
-; THUMB6-NEXT: ldr r5, .LCPI13_0
-; THUMB6-NEXT: lsls r5, r4
-; THUMB6-NEXT: ands r5, r2
-; THUMB6-NEXT: rsbs r2, r5, #0
-; THUMB6-NEXT: adcs r2, r5
+; THUMB6-NEXT: ands r1, r0
+; THUMB6-NEXT: rsbs r4, r1, #0
+; THUMB6-NEXT: adcs r1, r4
+; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: lsrs r2, r4
+; THUMB6-NEXT: ldr r4, .LCPI13_0
+; THUMB6-NEXT: ands r4, r2
+; THUMB6-NEXT: rsbs r2, r4, #0
+; THUMB6-NEXT: adcs r2, r4
+; THUMB6-NEXT: ldr r4, [sp, #20]
+; THUMB6-NEXT: lsrs r3, r4
; THUMB6-NEXT: lsls r4, r0, #31
-; THUMB6-NEXT: ldr r5, [sp, #28]
-; THUMB6-NEXT: lsls r4, r5
; THUMB6-NEXT: ands r4, r3
; THUMB6-NEXT: rsbs r3, r4, #0
; THUMB6-NEXT: adcs r3, r4
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r4, pc}
; THUMB6-NEXT: .p2align 2
; THUMB6-NEXT: @ %bb.1:
; THUMB6-NEXT: .LCPI13_0:
@@ -849,28 +768,23 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; ARM6-NEXT: ldr r2, [sp, #12]
; ARM6-NEXT: mov lr, #1
; ARM6-NEXT: ldr r12, [sp, #8]
-; ARM6-NEXT: and r1, r1, lr, lsl r2
+; ARM6-NEXT: bic r1, lr, r1, lsr r2
; ARM6-NEXT: ldr r2, [sp, #20]
-; ARM6-NEXT: and r0, r0, lr, lsl r12
-; ARM6-NEXT: clz r1, r1
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: and r2, r3, lr, lsl r2
-; ARM6-NEXT: lsr r1, r1, #5
-; ARM6-NEXT: clz r2, r2
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: lsr r3, r2, #5
+; ARM6-NEXT: bic r0, lr, r0, lsr r12
+; ARM6-NEXT: bic r3, lr, r3, lsr r2
; ARM6-NEXT: mov r2, #1
; ARM6-NEXT: pop {r11, pc}
;
; ARM78-LABEL: vec_4xi32_nonsplat_undef0_eq:
; ARM78: @ %bb.0:
-; ARM78-NEXT: vmov.i32 q8, #0x1
; ARM78-NEXT: mov r12, sp
-; ARM78-NEXT: vld1.64 {d18, d19}, [r12]
-; ARM78-NEXT: vshl.u32 q8, q8, q9
+; ARM78-NEXT: vld1.64 {d16, d17}, [r12]
; ARM78-NEXT: vmov d19, r2, r3
+; ARM78-NEXT: vneg.s32 q8, q8
; ARM78-NEXT: vmov d18, r0, r1
-; ARM78-NEXT: vtst.32 q8, q8, q9
+; ARM78-NEXT: vmov.i32 q10, #0x1
+; ARM78-NEXT: vshl.u32 q8, q9, q8
+; ARM78-NEXT: vtst.32 q8, q8, q10
; ARM78-NEXT: vmvn q8, q8
; ARM78-NEXT: vmovn.i32 d16, q8
; ARM78-NEXT: vmov r0, r1, d16
@@ -878,37 +792,35 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;
; THUMB6-LABEL: vec_4xi32_nonsplat_undef0_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: push {r4, lr}
+; THUMB6-NEXT: ldr r2, [sp, #8]
+; THUMB6-NEXT: lsrs r0, r2
; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsls r5, r4
-; THUMB6-NEXT: ands r5, r0
-; THUMB6-NEXT: rsbs r0, r5, #0
-; THUMB6-NEXT: adcs r0, r5
+; THUMB6-NEXT: ands r0, r2
+; THUMB6-NEXT: rsbs r4, r0, #0
+; THUMB6-NEXT: adcs r0, r4
+; THUMB6-NEXT: ldr r4, [sp, #12]
+; THUMB6-NEXT: lsrs r1, r4
+; THUMB6-NEXT: ands r1, r2
+; THUMB6-NEXT: rsbs r4, r1, #0
+; THUMB6-NEXT: adcs r1, r4
; THUMB6-NEXT: ldr r4, [sp, #20]
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsls r5, r4
-; THUMB6-NEXT: ands r5, r1
-; THUMB6-NEXT: rsbs r1, r5, #0
-; THUMB6-NEXT: adcs r1, r5
-; THUMB6-NEXT: ldr r4, [sp, #28]
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsls r5, r4
-; THUMB6-NEXT: ands r5, r3
-; THUMB6-NEXT: rsbs r3, r5, #0
-; THUMB6-NEXT: adcs r3, r5
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: lsrs r3, r4
+; THUMB6-NEXT: ands r3, r2
+; THUMB6-NEXT: rsbs r4, r3, #0
+; THUMB6-NEXT: adcs r3, r4
+; THUMB6-NEXT: pop {r4, pc}
;
; THUMB78-LABEL: vec_4xi32_nonsplat_undef0_eq:
; THUMB78: @ %bb.0:
-; THUMB78-NEXT: vmov.i32 q8, #0x1
; THUMB78-NEXT: mov r12, sp
-; THUMB78-NEXT: vld1.64 {d18, d19}, [r12]
-; THUMB78-NEXT: vshl.u32 q8, q8, q9
+; THUMB78-NEXT: vld1.64 {d16, d17}, [r12]
; THUMB78-NEXT: vmov d19, r2, r3
+; THUMB78-NEXT: vneg.s32 q8, q8
; THUMB78-NEXT: vmov d18, r0, r1
-; THUMB78-NEXT: vtst.32 q8, q8, q9
+; THUMB78-NEXT: vmov.i32 q10, #0x1
+; THUMB78-NEXT: vshl.u32 q8, q9, q8
+; THUMB78-NEXT: vtst.32 q8, q8, q10
; THUMB78-NEXT: vmvn q8, q8
; THUMB78-NEXT: vmovn.i32 d16, q8
; THUMB78-NEXT: vmov r0, r1, d16
@@ -925,16 +837,10 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; ARM6-NEXT: ldr r2, [sp, #12]
; ARM6-NEXT: mov lr, #1
; ARM6-NEXT: ldr r12, [sp, #8]
-; ARM6-NEXT: and r1, r1, lr, lsl r2
+; ARM6-NEXT: bic r1, lr, r1, lsr r2
; ARM6-NEXT: ldr r2, [sp, #20]
-; ARM6-NEXT: and r0, r0, lr, lsl r12
-; ARM6-NEXT: clz r1, r1
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: and r2, r3, lr, lsl r2
-; ARM6-NEXT: lsr r1, r1, #5
-; ARM6-NEXT: clz r2, r2
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: lsr r3, r2, #5
+; ARM6-NEXT: bic r0, lr, r0, lsr r12
+; ARM6-NEXT: bic r3, lr, r3, lsr r2
; ARM6-NEXT: pop {r11, pc}
;
; ARM78-LABEL: vec_4xi32_nonsplat_undef1_eq:
@@ -953,26 +859,24 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;
; THUMB6-LABEL: vec_4xi32_nonsplat_undef1_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: push {r4, lr}
+; THUMB6-NEXT: ldr r2, [sp, #8]
+; THUMB6-NEXT: lsrs r0, r2
; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsls r5, r4
-; THUMB6-NEXT: ands r5, r0
-; THUMB6-NEXT: rsbs r0, r5, #0
-; THUMB6-NEXT: adcs r0, r5
+; THUMB6-NEXT: ands r0, r2
+; THUMB6-NEXT: rsbs r4, r0, #0
+; THUMB6-NEXT: adcs r0, r4
+; THUMB6-NEXT: ldr r4, [sp, #12]
+; THUMB6-NEXT: lsrs r1, r4
+; THUMB6-NEXT: ands r1, r2
+; THUMB6-NEXT: rsbs r4, r1, #0
+; THUMB6-NEXT: adcs r1, r4
; THUMB6-NEXT: ldr r4, [sp, #20]
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsls r5, r4
-; THUMB6-NEXT: ands r5, r1
-; THUMB6-NEXT: rsbs r1, r5, #0
-; THUMB6-NEXT: adcs r1, r5
-; THUMB6-NEXT: ldr r4, [sp, #28]
-; THUMB6-NEXT: lsls r2, r4
-; THUMB6-NEXT: ands r2, r3
-; THUMB6-NEXT: rsbs r3, r2, #0
+; THUMB6-NEXT: lsrs r3, r4
+; THUMB6-NEXT: ands r3, r2
+; THUMB6-NEXT: rsbs r2, r3, #0
; THUMB6-NEXT: adcs r3, r2
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r4, pc}
;
; THUMB78-LABEL: vec_4xi32_nonsplat_undef1_eq:
; THUMB78: @ %bb.0:
@@ -999,16 +903,10 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; ARM6-NEXT: ldr r2, [sp, #12]
; ARM6-NEXT: mov lr, #1
; ARM6-NEXT: ldr r12, [sp, #8]
-; ARM6-NEXT: and r1, r1, lr, lsl r2
+; ARM6-NEXT: bic r1, lr, r1, lsr r2
; ARM6-NEXT: ldr r2, [sp, #20]
-; ARM6-NEXT: and r0, r0, lr, lsl r12
-; ARM6-NEXT: clz r1, r1
-; ARM6-NEXT: clz r0, r0
-; ARM6-NEXT: and r2, r3, lr, lsl r2
-; ARM6-NEXT: lsr r1, r1, #5
-; ARM6-NEXT: clz r2, r2
-; ARM6-NEXT: lsr r0, r0, #5
-; ARM6-NEXT: lsr r3, r2, #5
+; ARM6-NEXT: bic r0, lr, r0, lsr r12
+; ARM6-NEXT: bic r3, lr, r3, lsr r2
; ARM6-NEXT: pop {r11, pc}
;
; ARM78-LABEL: vec_4xi32_nonsplat_undef2_eq:
@@ -1027,26 +925,24 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;
; THUMB6-LABEL: vec_4xi32_nonsplat_undef2_eq:
; THUMB6: @ %bb.0:
-; THUMB6-NEXT: push {r4, r5, r7, lr}
-; THUMB6-NEXT: ldr r4, [sp, #16]
+; THUMB6-NEXT: push {r4, lr}
+; THUMB6-NEXT: ldr r2, [sp, #8]
+; THUMB6-NEXT: lsrs r0, r2
; THUMB6-NEXT: movs r2, #1
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsls r5, r4
-; THUMB6-NEXT: ands r5, r0
-; THUMB6-NEXT: rsbs r0, r5, #0
-; THUMB6-NEXT: adcs r0, r5
+; THUMB6-NEXT: ands r0, r2
+; THUMB6-NEXT: rsbs r4, r0, #0
+; THUMB6-NEXT: adcs r0, r4
+; THUMB6-NEXT: ldr r4, [sp, #12]
+; THUMB6-NEXT: lsrs r1, r4
+; THUMB6-NEXT: ands r1, r2
+; THUMB6-NEXT: rsbs r4, r1, #0
+; THUMB6-NEXT: adcs r1, r4
; THUMB6-NEXT: ldr r4, [sp, #20]
-; THUMB6-NEXT: mov r5, r2
-; THUMB6-NEXT: lsls r5, r4
-; THUMB6-NEXT: ands r5, r1
-; THUMB6-NEXT: rsbs r1, r5, #0
-; THUMB6-NEXT: adcs r1, r5
-; THUMB6-NEXT: ldr r4, [sp, #28]
-; THUMB6-NEXT: lsls r2, r4
-; THUMB6-NEXT: ands r2, r3
-; THUMB6-NEXT: rsbs r3, r2, #0
+; THUMB6-NEXT: lsrs r3, r4
+; THUMB6-NEXT: ands r3, r2
+; THUMB6-NEXT: rsbs r2, r3, #0
; THUMB6-NEXT: adcs r3, r2
-; THUMB6-NEXT: pop {r4, r5, r7, pc}
+; THUMB6-NEXT: pop {r4, pc}
;
; THUMB78-LABEL: vec_4xi32_nonsplat_undef2_eq:
; THUMB78: @ %bb.0:
@@ -1072,49 +968,41 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;------------------------------------------------------------------------------;
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
-; ARM6-LABEL: scalar_i8_signbit_ne:
-; ARM6: @ %bb.0:
-; ARM6-NEXT: uxtb r1, r1
-; ARM6-NEXT: mvn r2, #127
-; ARM6-NEXT: and r0, r0, r2, lsl r1
-; ARM6-NEXT: uxtb r0, r0
-; ARM6-NEXT: cmp r0, #0
-; ARM6-NEXT: movne r0, #1
-; ARM6-NEXT: bx lr
-;
-; ARM78-LABEL: scalar_i8_signbit_ne:
-; ARM78: @ %bb.0:
-; ARM78-NEXT: uxtb r1, r1
-; ARM78-NEXT: mvn r2, #127
-; ARM78-NEXT: and r0, r0, r2, lsl r1
-; ARM78-NEXT: uxtb r0, r0
-; ARM78-NEXT: cmp r0, #0
-; ARM78-NEXT: movwne r0, #1
-; ARM78-NEXT: bx lr
+; ARM-LABEL: scalar_i8_signbit_ne:
+; ARM: @ %bb.0:
+; ARM-NEXT: uxtb r1, r1
+; ARM-NEXT: uxtb r0, r0
+; ARM-NEXT: lsr r0, r0, r1
+; ARM-NEXT: uxtb r0, r0
+; ARM-NEXT: lsr r0, r0, #7
+; ARM-NEXT: bx lr
;
; THUMB6-LABEL: scalar_i8_signbit_ne:
; THUMB6: @ %bb.0:
; THUMB6-NEXT: uxtb r1, r1
-; THUMB6-NEXT: movs r2, #127
-; THUMB6-NEXT: mvns r2, r2
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxtb r0, r2
-; THUMB6-NEXT: subs r1, r0, #1
-; THUMB6-NEXT: sbcs r0, r1
+; THUMB6-NEXT: uxtb r0, r0
+; THUMB6-NEXT: lsrs r0, r1
+; THUMB6-NEXT: uxtb r0, r0
+; THUMB6-NEXT: lsrs r0, r0, #7
; THUMB6-NEXT: bx lr
;
-; THUMB78-LABEL: scalar_i8_signbit_ne:
-; THUMB78: @ %bb.0:
-; THUMB78-NEXT: uxtb r1, r1
-; THUMB78-NEXT: mvn r2, #127
-; THUMB78-NEXT: lsl.w r1, r2, r1
-; THUMB78-NEXT: ands r0, r1
-; THUMB78-NEXT: uxtb r0, r0
-; THUMB78-NEXT: cmp r0, #0
-; THUMB78-NEXT: it ne
-; THUMB78-NEXT: movne r0, #1
-; THUMB78-NEXT: bx lr
+; THUMB7-LABEL: scalar_i8_signbit_ne:
+; THUMB7: @ %bb.0:
+; THUMB7-NEXT: uxtb r1, r1
+; THUMB7-NEXT: uxtb r0, r0
+; THUMB7-NEXT: lsrs r0, r1
+; THUMB7-NEXT: uxtb r0, r0
+; THUMB7-NEXT: lsrs r0, r0, #7
+; THUMB7-NEXT: bx lr
+;
+; THUMB8-LABEL: scalar_i8_signbit_ne:
+; THUMB8: @ %bb.0:
+; THUMB8-NEXT: uxtb r0, r0
+; THUMB8-NEXT: uxtb r1, r1
+; THUMB8-NEXT: lsrs r0, r1
+; THUMB8-NEXT: uxtb r0, r0
+; THUMB8-NEXT: lsrs r0, r0, #7
+; THUMB8-NEXT: bx lr
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate
diff --git a/llvm/test/CodeGen/PowerPC/shift-cmp.ll b/llvm/test/CodeGen/PowerPC/shift-cmp.ll
index ffbcd6b85c0..d580ad007dd 100644
--- a/llvm/test/CodeGen/PowerPC/shift-cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/shift-cmp.ll
@@ -6,8 +6,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
define i1 @and_cmp_variable_power_of_two(i32 %x, i32 %y) {
; CHECK-LABEL: and_cmp_variable_power_of_two:
; CHECK: # %bb.0:
-; CHECK-NEXT: subfic 4, 4, 32
-; CHECK-NEXT: rlwnm 3, 3, 4, 31, 31
+; CHECK-NEXT: srw 3, 3, 4
; CHECK-NEXT: blr
%shl = shl i32 1, %y
%and = and i32 %x, %shl
@@ -18,8 +17,7 @@ define i1 @and_cmp_variable_power_of_two(i32 %x, i32 %y) {
define i1 @and_cmp_variable_power_of_two_64(i64 %x, i64 %y) {
; CHECK-LABEL: and_cmp_variable_power_of_two_64:
; CHECK: # %bb.0:
-; CHECK-NEXT: subfic 4, 4, 64
-; CHECK-NEXT: rldcl 3, 3, 4, 63
+; CHECK-NEXT: srd 3, 3, 4
; CHECK-NEXT: blr
%shl = shl i64 1, %y
%and = and i64 %x, %shl
@@ -30,9 +28,8 @@ define i1 @and_cmp_variable_power_of_two_64(i64 %x, i64 %y) {
define i1 @and_ncmp_variable_power_of_two(i32 %x, i32 %y) {
; CHECK-LABEL: and_ncmp_variable_power_of_two:
; CHECK: # %bb.0:
-; CHECK-NEXT: subfic 4, 4, 32
-; CHECK-NEXT: nor 3, 3, 3
-; CHECK-NEXT: rlwnm 3, 3, 4, 31, 31
+; CHECK-NEXT: srw 3, 3, 4
+; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%shl = shl i32 1, %y
%and = and i32 %x, %shl
@@ -43,9 +40,8 @@ define i1 @and_ncmp_variable_power_of_two(i32 %x, i32 %y) {
define i1 @and_ncmp_variable_power_of_two_64(i64 %x, i64 %y) {
; CHECK-LABEL: and_ncmp_variable_power_of_two_64:
; CHECK: # %bb.0:
-; CHECK-NEXT: not 3, 3
-; CHECK-NEXT: subfic 4, 4, 64
-; CHECK-NEXT: rldcl 3, 3, 4, 63
+; CHECK-NEXT: srd 3, 3, 4
+; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%shl = shl i64 1, %y
%and = and i64 %x, %shl
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index 88c939f52be..363df5335ef 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -23,19 +23,18 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb $-128, %al
-; X86-NEXT: shrb %cl, %al
-; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shlb %cl, %al
+; X86-NEXT: testb $-128, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_signbit_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movb $-128, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrb %cl, %al
-; X64-NEXT: testb %dil, %al
+; X64-NEXT: shlb %cl, %dil
+; X64-NEXT: testb $-128, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = lshr i8 128, %y
@@ -48,19 +47,18 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_lowestbit_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb $1, %al
-; X86-NEXT: shrb %cl, %al
-; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shlb %cl, %al
+; X86-NEXT: testb $1, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_lowestbit_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movb $1, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrb %cl, %al
-; X64-NEXT: testb %dil, %al
+; X64-NEXT: shlb %cl, %dil
+; X64-NEXT: testb $1, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = lshr i8 1, %y
@@ -73,19 +71,18 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_bitsinmiddle_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb $24, %al
-; X86-NEXT: shrb %cl, %al
-; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shlb %cl, %al
+; X86-NEXT: testb $24, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_bitsinmiddle_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movb $24, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrb %cl, %al
-; X64-NEXT: testb %dil, %al
+; X64-NEXT: shlb %cl, %dil
+; X64-NEXT: testb $24, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = lshr i8 24, %y
@@ -100,36 +97,33 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $32768, %eax # imm = 0x8000
-; X86-BMI1-NEXT: shrl %cl, %eax
-; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shll %cl, %eax
+; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $32768, %ecx # imm = 0x8000
-; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $32768, %eax # imm = 0x8000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %eax
-; X64-BMI1-NEXT: testw %di, %ax
+; X64-BMI1-NEXT: shll %cl, %edi
+; X64-BMI1-NEXT: testl $32768, %edi # imm = 0x8000
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_signbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $32768, %eax # imm = 0x8000
-; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testw %di, %ax
+; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
+; X64-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i16 32768, %y
@@ -142,36 +136,33 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_lowestbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $1, %eax
-; X86-BMI1-NEXT: shrl %cl, %eax
-; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shll %cl, %eax
+; X86-BMI1-NEXT: testb $1, %al
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_lowestbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $1, %ecx
-; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: testb $1, %al
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_lowestbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $1, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %eax
-; X64-BMI1-NEXT: testw %di, %ax
+; X64-BMI1-NEXT: shll %cl, %edi
+; X64-BMI1-NEXT: testb $1, %dil
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_lowestbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $1, %eax
-; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testw %di, %ax
+; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
+; X64-BMI2-NEXT: testb $1, %al
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i16 1, %y
@@ -184,36 +175,33 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0
-; X86-BMI1-NEXT: shrl %cl, %eax
-; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shll %cl, %eax
+; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $4080, %ecx # imm = 0xFF0
-; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %eax
-; X64-BMI1-NEXT: testw %di, %ax
+; X64-BMI1-NEXT: shll %cl, %edi
+; X64-BMI1-NEXT: testl $4080, %edi # imm = 0xFF0
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $4080, %eax # imm = 0xFF0
-; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testw %di, %ax
+; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
+; X64-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i16 4080, %y
@@ -228,36 +216,33 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
-; X86-BMI1-NEXT: shrl %cl, %eax
-; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shll %cl, %eax
+; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %eax
-; X64-BMI1-NEXT: testl %edi, %eax
+; X64-BMI1-NEXT: shll %cl, %edi
+; X64-BMI1-NEXT: testl $-2147483648, %edi # imm = 0x80000000
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_signbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000
-; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testl %edi, %eax
+; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
+; X64-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i32 2147483648, %y
@@ -270,36 +255,33 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_lowestbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $1, %eax
-; X86-BMI1-NEXT: shrl %cl, %eax
-; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shll %cl, %eax
+; X86-BMI1-NEXT: testb $1, %al
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_lowestbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $1, %ecx
-; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: testb $1, %al
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_lowestbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $1, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %eax
-; X64-BMI1-NEXT: testl %edi, %eax
+; X64-BMI1-NEXT: shll %cl, %edi
+; X64-BMI1-NEXT: testb $1, %dil
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_lowestbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $1, %eax
-; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testl %edi, %eax
+; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
+; X64-BMI2-NEXT: testb $1, %al
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i32 1, %y
@@ -312,36 +294,33 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00
-; X86-BMI1-NEXT: shrl %cl, %eax
-; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shll %cl, %eax
+; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $16776960, %ecx # imm = 0xFFFF00
-; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %eax
-; X64-BMI1-NEXT: testl %edi, %eax
+; X64-BMI1-NEXT: shll %cl, %edi
+; X64-BMI1-NEXT: testl $16776960, %edi # imm = 0xFFFF00
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $16776960, %eax # imm = 0xFFFF00
-; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testl %edi, %eax
+; X64-BMI2-NEXT: shlxl %esi, %edi, %eax
+; X64-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i32 16776960, %y
@@ -357,55 +336,44 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
-; X86-BMI1-NEXT: xorl %edx, %edx
-; X86-BMI1-NEXT: xorl %esi, %esi
-; X86-BMI1-NEXT: shrdl %cl, %eax, %esi
-; X86-BMI1-NEXT: shrl %cl, %eax
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT: movl %eax, %esi
+; X86-BMI1-NEXT: shll %cl, %esi
+; X86-BMI1-NEXT: shldl %cl, %eax, %edx
; X86-BMI1-NEXT: testb $32, %cl
-; X86-BMI1-NEXT: cmovnel %eax, %esi
-; X86-BMI1-NEXT: cmovnel %edx, %eax
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: orl %esi, %eax
+; X86-BMI1-NEXT: cmovnel %esi, %edx
+; X86-BMI1-NEXT: testl $-2147483648, %edx # imm = 0x80000000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i64_signbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000
-; X86-BMI2-NEXT: xorl %edx, %edx
-; X86-BMI2-NEXT: xorl %esi, %esi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %esi
-; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT: shldl %cl, %eax, %edx
+; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %eax, %esi
-; X86-BMI2-NEXT: cmovnel %edx, %eax
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: orl %esi, %eax
+; X86-BMI2-NEXT: cmovel %edx, %eax
+; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI2-NEXT: sete %al
-; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i64_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
-; X64-BMI1-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1-NEXT: shrq %cl, %rax
-; X64-BMI1-NEXT: testq %rdi, %rax
+; X64-BMI1-NEXT: shlq %cl, %rdi
+; X64-BMI1-NEXT: shrq $63, %rdi
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_signbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
-; X64-BMI2-NEXT: testq %rdi, %rax
+; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: shrq $63, %rax
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i64 9223372036854775808, %y
@@ -415,34 +383,42 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
}
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
-; X86-LABEL: scalar_i64_lowestbit_eq:
-; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: movl $1, %edx
-; X86-NEXT: shrdl %cl, %eax, %edx
-; X86-NEXT: testb $32, %cl
-; X86-NEXT: cmovnel %eax, %edx
-; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: orl $0, %edx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
+; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
+; X86-BMI1: # %bb.0:
+; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shll %cl, %eax
+; X86-BMI1-NEXT: xorl %edx, %edx
+; X86-BMI1-NEXT: testb $32, %cl
+; X86-BMI1-NEXT: cmovel %eax, %edx
+; X86-BMI1-NEXT: testb $1, %dl
+; X86-BMI1-NEXT: sete %al
+; X86-BMI1-NEXT: retl
+;
+; X86-BMI2-LABEL: scalar_i64_lowestbit_eq:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: xorl %edx, %edx
+; X86-BMI2-NEXT: testb $32, %al
+; X86-BMI2-NEXT: cmovel %ecx, %edx
+; X86-BMI2-NEXT: testb $1, %dl
+; X86-BMI2-NEXT: sete %al
+; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i64_lowestbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
-; X64-BMI1-NEXT: movl $1, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1-NEXT: shrq %cl, %rax
-; X64-BMI1-NEXT: testq %rdi, %rax
+; X64-BMI1-NEXT: shlq %cl, %rdi
+; X64-BMI1-NEXT: testb $1, %dil
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_lowestbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $1, %eax
-; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
-; X64-BMI2-NEXT: testq %rdi, %rax
+; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: testb $1, %al
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i64 1, %y
@@ -456,17 +432,18 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $65535, %eax # imm = 0xFFFF
-; X86-BMI1-NEXT: movl $-65536, %edx # imm = 0xFFFF0000
-; X86-BMI1-NEXT: shrdl %cl, %eax, %edx
-; X86-BMI1-NEXT: shrl %cl, %eax
-; X86-BMI1-NEXT: xorl %esi, %esi
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT: movl %eax, %esi
+; X86-BMI1-NEXT: shll %cl, %esi
+; X86-BMI1-NEXT: shldl %cl, %eax, %edx
+; X86-BMI1-NEXT: xorl %eax, %eax
; X86-BMI1-NEXT: testb $32, %cl
-; X86-BMI1-NEXT: cmovnel %eax, %edx
-; X86-BMI1-NEXT: cmovel %eax, %esi
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: orl %edx, %esi
+; X86-BMI1-NEXT: cmovnel %esi, %edx
+; X86-BMI1-NEXT: movzwl %dx, %ecx
+; X86-BMI1-NEXT: cmovel %esi, %eax
+; X86-BMI1-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
+; X86-BMI1-NEXT: orl %ecx, %eax
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: retl
@@ -475,17 +452,17 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI2-NEXT: movl $65535, %eax # imm = 0xFFFF
-; X86-BMI2-NEXT: movl $-65536, %edx # imm = 0xFFFF0000
-; X86-BMI2-NEXT: shrdl %cl, %eax, %edx
-; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT: shldl %cl, %eax, %edx
+; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: xorl %esi, %esi
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: cmovnel %eax, %edx
+; X86-BMI2-NEXT: movzwl %dx, %ecx
; X86-BMI2-NEXT: cmovel %eax, %esi
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: orl %edx, %esi
+; X86-BMI2-NEXT: andl $-65536, %esi # imm = 0xFFFF0000
+; X86-BMI2-NEXT: orl %ecx, %esi
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
@@ -493,18 +470,18 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
-; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1-NEXT: shrq %cl, %rax
-; X64-BMI1-NEXT: testq %rdi, %rax
+; X64-BMI1-NEXT: shlq %cl, %rdi
+; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-BMI1-NEXT: testq %rax, %rdi
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
-; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
-; X64-BMI2-NEXT: testq %rdi, %rax
+; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000
+; X64-BMI2-NEXT: testq %rcx, %rax
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = lshr i64 281474976645120, %y
@@ -518,37 +495,48 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
;------------------------------------------------------------------------------;
define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; SSE2-LABEL: vec_4xi32_splat_eq:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1]
-; SSE2-NEXT: movdqa %xmm3, %xmm4
-; SSE2-NEXT: psrld %xmm2, %xmm4
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm3, %xmm5
-; SSE2-NEXT: psrld %xmm2, %xmm5
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm3, %xmm4
-; SSE2-NEXT: psrld %xmm2, %xmm4
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; SSE2-NEXT: psrld %xmm1, %xmm3
-; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
-; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
-; SSE2-NEXT: andps %xmm5, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
+; X86-SSE2-LABEL: vec_4xi32_splat_eq:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pxor %xmm2, %xmm2
+; X86-SSE2-NEXT: pslld $23, %xmm1
+; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT: pmuludq %xmm3, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm0
+; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; X86-SSE2-NEXT: retl
;
; AVX2-LABEL: vec_4xi32_splat_eq:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
-; AVX2-NEXT: vpsrlvd %xmm1, %xmm2, %xmm1
-; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: ret{{[l|q]}}
+;
+; X64-SSE2-LABEL: vec_4xi32_splat_eq:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: pxor %xmm2, %xmm2
+; X64-SSE2-NEXT: pslld $23, %xmm1
+; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
+; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; X64-SSE2-NEXT: pmuludq %xmm1, %xmm0
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-SSE2-NEXT: pmuludq %xmm3, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; X64-SSE2-NEXT: retq
%t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
@@ -594,37 +582,48 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
}
define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = <1,1,u,1>
-; SSE2-NEXT: movdqa %xmm3, %xmm4
-; SSE2-NEXT: psrld %xmm2, %xmm4
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm3, %xmm5
-; SSE2-NEXT: psrld %xmm2, %xmm5
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm3, %xmm4
-; SSE2-NEXT: psrld %xmm2, %xmm4
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; SSE2-NEXT: psrld %xmm1, %xmm3
-; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
-; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
-; SSE2-NEXT: andps %xmm5, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
-; SSE2-NEXT: ret{{[l|q]}}
+; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pxor %xmm2, %xmm2
+; X86-SSE2-NEXT: pslld $23, %xmm1
+; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT: pmuludq %xmm3, %xmm1
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm0
+; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; X86-SSE2-NEXT: retl
;
; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
-; AVX2-NEXT: vpsrlvd %xmm1, %xmm2, %xmm1
-; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: ret{{[l|q]}}
+;
+; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: pxor %xmm2, %xmm2
+; X64-SSE2-NEXT: pslld $23, %xmm1
+; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
+; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; X64-SSE2-NEXT: pmuludq %xmm1, %xmm0
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-SSE2-NEXT: pmuludq %xmm3, %xmm1
+; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; X64-SSE2-NEXT: retq
%t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
%t1 = and <4 x i32> %t0, %x
%res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
@@ -713,20 +712,19 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_ne:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb $-128, %al
-; X86-NEXT: shrb %cl, %al
-; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
-; X86-NEXT: setne %al
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shlb %cl, %al
+; X86-NEXT: shrb $7, %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_signbit_ne:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movb $-128, %al
+; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrb %cl, %al
-; X64-NEXT: testb %dil, %al
-; X64-NEXT: setne %al
+; X64-NEXT: shlb %cl, %al
+; X64-NEXT: shrb $7, %al
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index aa4a135fd68..99799d17d21 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -23,19 +23,18 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb $-128, %al
-; X86-NEXT: shlb %cl, %al
-; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shrb %cl, %al
+; X86-NEXT: testb $-128, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_signbit_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movb $-128, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shlb %cl, %al
-; X64-NEXT: testb %dil, %al
+; X64-NEXT: shrb %cl, %dil
+; X64-NEXT: testb $-128, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = shl i8 128, %y
@@ -68,19 +67,18 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_bitsinmiddle_eq:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb $24, %al
-; X86-NEXT: shlb %cl, %al
-; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shrb %cl, %al
+; X86-NEXT: testb $24, %al
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_bitsinmiddle_eq:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movb $24, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shlb %cl, %al
-; X64-NEXT: testb %dil, %al
+; X64-NEXT: shrb %cl, %dil
+; X64-NEXT: testb $24, %dil
; X64-NEXT: sete %al
; X64-NEXT: retq
%t0 = shl i8 24, %y
@@ -95,36 +93,36 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-32768, %eax # imm = 0x8000
-; X86-BMI1-NEXT: shll %cl, %eax
-; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shrl %cl, %eax
+; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_signbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $-32768, %ecx # imm = 0x8000
-; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $-32768, %eax # imm = 0x8000
+; X64-BMI1-NEXT: movzwl %di, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shll %cl, %eax
-; X64-BMI1-NEXT: testw %di, %ax
+; X64-BMI1-NEXT: shrl %cl, %eax
+; X64-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_signbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $-32768, %eax # imm = 0x8000
-; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testw %di, %ax
+; X64-BMI2-NEXT: movzwl %di, %eax
+; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
+; X64-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i16 32768, %y
@@ -157,36 +155,36 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0
-; X86-BMI1-NEXT: shll %cl, %eax
-; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shrl %cl, %eax
+; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $4080, %ecx # imm = 0xFF0
-; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0
+; X64-BMI1-NEXT: movzwl %di, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shll %cl, %eax
-; X64-BMI1-NEXT: testw %di, %ax
+; X64-BMI1-NEXT: shrl %cl, %eax
+; X64-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $4080, %eax # imm = 0xFF0
-; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testw %di, %ax
+; X64-BMI2-NEXT: movzwl %di, %eax
+; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
+; X64-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i16 4080, %y
@@ -201,36 +199,33 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
-; X86-BMI1-NEXT: shll %cl, %eax
-; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shrl %cl, %eax
+; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
-; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shll %cl, %eax
-; X64-BMI1-NEXT: testl %edi, %eax
+; X64-BMI1-NEXT: shrl %cl, %edi
+; X64-BMI1-NEXT: testl $-2147483648, %edi # imm = 0x80000000
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_signbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000
-; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testl %edi, %eax
+; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i32 2147483648, %y
@@ -263,36 +258,33 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00
-; X86-BMI1-NEXT: shll %cl, %eax
-; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shrl %cl, %eax
+; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $16776960, %ecx # imm = 0xFFFF00
-; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shll %cl, %eax
-; X64-BMI1-NEXT: testl %edi, %eax
+; X64-BMI1-NEXT: shrl %cl, %edi
+; X64-BMI1-NEXT: testl $16776960, %edi # imm = 0xFFFF00
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $16776960, %eax # imm = 0xFFFF00
-; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
-; X64-BMI2-NEXT: testl %edi, %eax
+; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i32 16776960, %y
@@ -304,35 +296,43 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; i64 scalar
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
-; X86-LABEL: scalar_i64_signbit_eq:
-; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: movl $-2147483648, %edx # imm = 0x80000000
-; X86-NEXT: shldl %cl, %eax, %edx
-; X86-NEXT: testb $32, %cl
-; X86-NEXT: cmovnel %eax, %edx
-; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: orl $0, %edx
-; X86-NEXT: sete %al
-; X86-NEXT: retl
+; X86-BMI1-LABEL: scalar_i64_signbit_eq:
+; X86-BMI1: # %bb.0:
+; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: shrl %cl, %eax
+; X86-BMI1-NEXT: xorl %edx, %edx
+; X86-BMI1-NEXT: testb $32, %cl
+; X86-BMI1-NEXT: cmovel %eax, %edx
+; X86-BMI1-NEXT: testl $-2147483648, %edx # imm = 0x80000000
+; X86-BMI1-NEXT: sete %al
+; X86-BMI1-NEXT: retl
+;
+; X86-BMI2-LABEL: scalar_i64_signbit_eq:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: xorl %edx, %edx
+; X86-BMI2-NEXT: testb $32, %al
+; X86-BMI2-NEXT: cmovel %ecx, %edx
+; X86-BMI2-NEXT: testl $-2147483648, %edx # imm = 0x80000000
+; X86-BMI2-NEXT: sete %al
+; X86-BMI2-NEXT: retl
;
; X64-BMI1-LABEL: scalar_i64_signbit_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
-; X64-BMI1-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1-NEXT: shlq %cl, %rax
-; X64-BMI1-NEXT: testq %rdi, %rax
-; X64-BMI1-NEXT: sete %al
+; X64-BMI1-NEXT: shrq %cl, %rdi
+; X64-BMI1-NEXT: btq $63, %rdi
+; X64-BMI1-NEXT: setae %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_signbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
-; X64-BMI2-NEXT: testq %rdi, %rax
-; X64-BMI2-NEXT: sete %al
+; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: btq $63, %rax
+; X64-BMI2-NEXT: setae %al
; X64-BMI2-NEXT: retq
%t0 = shl i64 9223372036854775808, %y
%t1 = and i64 %t0, %x
@@ -395,17 +395,18 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-65536, %eax # imm = 0xFFFF0000
-; X86-BMI1-NEXT: movl $65535, %edx # imm = 0xFFFF
-; X86-BMI1-NEXT: shldl %cl, %eax, %edx
-; X86-BMI1-NEXT: shll %cl, %eax
-; X86-BMI1-NEXT: xorl %esi, %esi
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT: movl %edx, %esi
+; X86-BMI1-NEXT: shrl %cl, %esi
+; X86-BMI1-NEXT: shrdl %cl, %edx, %eax
+; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: testb $32, %cl
-; X86-BMI1-NEXT: cmovnel %eax, %edx
-; X86-BMI1-NEXT: cmovel %eax, %esi
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: orl %edx, %esi
+; X86-BMI1-NEXT: cmovnel %esi, %eax
+; X86-BMI1-NEXT: cmovel %esi, %edx
+; X86-BMI1-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
+; X86-BMI1-NEXT: movzwl %dx, %ecx
+; X86-BMI1-NEXT: orl %eax, %ecx
; X86-BMI1-NEXT: sete %al
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: retl
@@ -414,17 +415,17 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI2-NEXT: movl $-65536, %eax # imm = 0xFFFF0000
-; X86-BMI2-NEXT: movl $65535, %edx # imm = 0xFFFF
-; X86-BMI2-NEXT: shldl %cl, %eax, %edx
-; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx
; X86-BMI2-NEXT: xorl %esi, %esi
; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %eax, %edx
-; X86-BMI2-NEXT: cmovel %eax, %esi
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: orl %edx, %esi
+; X86-BMI2-NEXT: cmovnel %edx, %eax
+; X86-BMI2-NEXT: cmovel %edx, %esi
+; X86-BMI2-NEXT: andl $-65536, %eax # imm = 0xFFFF0000
+; X86-BMI2-NEXT: movzwl %si, %ecx
+; X86-BMI2-NEXT: orl %eax, %ecx
; X86-BMI2-NEXT: sete %al
; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
@@ -432,18 +433,18 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movq %rsi, %rcx
-; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1-NEXT: shlq %cl, %rax
-; X64-BMI1-NEXT: testq %rdi, %rax
+; X64-BMI1-NEXT: shrq %cl, %rdi
+; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-BMI1-NEXT: testq %rax, %rdi
; X64-BMI1-NEXT: sete %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
-; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
-; X64-BMI2-NEXT: testq %rdi, %rax
+; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000
+; X64-BMI2-NEXT: testq %rcx, %rax
; X64-BMI2-NEXT: sete %al
; X64-BMI2-NEXT: retq
%t0 = shl i64 281474976645120, %y
@@ -477,10 +478,10 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; AVX2-LABEL: vec_4xi32_splat_eq:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
-; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1
-; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: ret{{[l|q]}}
;
; X64-SSE2-LABEL: vec_4xi32_splat_eq:
@@ -577,10 +578,10 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwi
; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
-; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1
-; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
; AVX2-NEXT: ret{{[l|q]}}
;
; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
@@ -709,20 +710,19 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_ne:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb $-128, %al
-; X86-NEXT: shlb %cl, %al
-; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
-; X86-NEXT: setne %al
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shrb %cl, %al
+; X86-NEXT: shrb $7, %al
; X86-NEXT: retl
;
; X64-LABEL: scalar_i8_signbit_ne:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movb $-128, %al
+; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shlb %cl, %al
-; X64-NEXT: testb %dil, %al
-; X64-NEXT: setne %al
+; X64-NEXT: shrb %cl, %al
+; X64-NEXT: shrb $7, %al
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x
OpenPOWER on IntegriCloud