diff options
author | Benjamin Kramer <benny.kra@googlemail.com> | 2017-12-14 14:03:07 +0000 |
---|---|---|
committer | Benjamin Kramer <benny.kra@googlemail.com> | 2017-12-14 14:03:07 +0000 |
commit | a85822cb1ed55466e16377c7823a59ffe44d8bcf (patch) | |
tree | d796c64cf183c4274462811e0a9f92833c3d66cb | |
parent | 77091b167fd959e1ee0c4dad4ec44de43b6c95db (diff) | |
download | bcm5719-llvm-a85822cb1ed55466e16377c7823a59ffe44d8bcf.tar.gz bcm5719-llvm-a85822cb1ed55466e16377c7823a59ffe44d8bcf.zip |
Revert "[DAGCombine] Move AND nodes to multiple load leaves"
This reverts commit r320679. Causes miscompiles.
llvm-svn: 320698
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 124 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/and-load-combine.ll | 652 |
2 files changed, 355 insertions, 421 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6ba248438d9..da2ca8851e3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -505,13 +505,6 @@ namespace { bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, EVT &ExtVT, unsigned ShAmt = 0); - /// Used by BackwardsPropagateMask to find suitable loads. - bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads, - ConstantSDNode *Mask, SDNode *&UncombinedNode); - /// Attempt to propagate a given AND node back to load leaves so that they - /// can be combined into narrow loads. - bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); - /// Helper function for MergeConsecutiveStores which merges the /// component store chains. SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, @@ -3805,113 +3798,6 @@ bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, return true; } -bool DAGCombiner::SearchForAndLoads(SDNode *N, - SmallPtrSetImpl<LoadSDNode*> &Loads, - ConstantSDNode *Mask, - SDNode *&NodeToMask) { - // Recursively search for the operands, looking for loads which can be - // narrowed. - for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { - SDValue Op = N->getOperand(i); - - // Constants should already be fixed up... - if (isa<ConstantSDNode>(Op)) - continue; - - if (!Op.hasOneUse() || Op.getValueType().isVector()) - return false; - - switch(Op.getOpcode()) { - case ISD::LOAD: { - auto *Load = cast<LoadSDNode>(Op); - EVT ExtVT; - if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && - isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) { - // Only add this load if we can make it more narrow. - if (ExtVT.bitsLT(Load->getMemoryVT())) - Loads.insert(Load); - continue; - } - return false; - } - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - case ISD::AssertZext: { - unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT VT = Op.getOpcode() == ISD::AssertZext ? - cast<VTSDNode>(Op.getOperand(1))->getVT() : - Op.getOperand(0).getValueType(); - - // We can accept extending nodes if the mask is wider or an equal - // width to the original type. - if (ExtVT.bitsGE(VT)) - continue; - break; - } - case ISD::OR: - case ISD::XOR: - case ISD::AND: - if (!SearchForAndLoads(Op.getNode(), Loads, Mask, NodeToMask)) - return false; - continue; - } - - // Allow one node which will masked along with any loads found. - if (NodeToMask) - return false; - NodeToMask = Op.getNode(); - } - return true; -} - -bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { - auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (!Mask) - return false; - - if (!Mask->getAPIntValue().isMask()) - return false; - - // No need to do anything if the and directly uses a load. - if (isa<LoadSDNode>(N->getOperand(0))) - return false; - - SmallPtrSet<LoadSDNode*, 8> Loads; - SDNode *FixupNode = nullptr; - if (SearchForAndLoads(N, Loads, Mask, FixupNode)) { - if (Loads.size() == 0) - return false; - - SDValue MaskOp = N->getOperand(1); - - // If it exists, fixup the single node we allow in the tree that needs - // masking. - if (FixupNode) { - SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), - FixupNode->getValueType(0), - SDValue(FixupNode, 0), MaskOp); - DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); - DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), - MaskOp); - } - - for (auto *Load : Loads) { - SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), - SDValue(Load, 0), MaskOp); - DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); - DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp); - SDValue NewLoad = ReduceLoadWidth(And.getNode()); - assert(NewLoad && - "Shouldn't be masking the load if it can't be narrowed"); - CombineTo(Load, NewLoad, NewLoad.getValue(1)); - } - DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); - return true; - } - return false; -} - SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4113,16 +3999,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - if (Level >= AfterLegalizeTypes) { - // Attempt to propagate the AND back up to the leaves which, if they're - // loads, can be combined to narrow loads and the AND node can be removed. - // Perform after legalization so that extend nodes will already be - // combined into the loads. - if (BackwardsPropagateMask(N, DAG)) { - return SDValue(N, 0); - } - } - if (SDValue Combined = visitANDLike(N0, N1, N)) return Combined; diff --git a/llvm/test/CodeGen/ARM/and-load-combine.ll b/llvm/test/CodeGen/ARM/and-load-combine.ll index fde10f8a400..6f0c1235959 100644 --- a/llvm/test/CodeGen/ARM/and-load-combine.ll +++ b/llvm/test/CodeGen/ARM/and-load-combine.ll @@ -5,30 +5,34 @@ ; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefix=THUMB2 define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture readonly %a, - i16* nocapture readonly %b) { ; ARM-LABEL: cmp_xor8_short_short: -; ARM: ldrb r2, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: eor r1, r1, r0 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: teq r1, r2 +; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_xor8_short_short: -; ARMEB: ldrb r2, [r0, #1] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrh r0, [r0] +; ARMEB-NEXT: ldrh r1, [r1] +; ARMEB-NEXT: eor r1, r1, r0 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: ldrb r1, [r1, #1] -; ARMEB-NEXT: teq r1, r2 +; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_xor8_short_short: -; THUMB1: ldrb r0, [r0] -; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrh r0, [r0] +; THUMB1-NEXT: ldrh r2, [r1] ; THUMB1-NEXT: eors r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: lsls r2, r2, #24 ; THUMB1-NEXT: beq .LBB0_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -36,13 +40,16 @@ define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_xor8_short_short: -; THUMB2: ldrb r2, [r0] -; THUMB2-NEXT: movs r0, #0 -; THUMB2-NEXT: ldrb r1, [r1] -; THUMB2-NEXT: teq.w r1, r2 +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: eors r0, r1 +; THUMB2-NEXT: lsls r0, r0, #24 +; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i16* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %1 = load i16, i16* %b, align 2 @@ -53,30 +60,34 @@ entry: } define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture readonly %a, - i32* nocapture readonly %b) { ; ARM-LABEL: cmp_xor8_short_int: -; ARM: ldrb r2, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: eor r1, r1, r0 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: teq r1, r2 +; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_xor8_short_int: -; ARMEB: ldrb r2, [r0, #1] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrh r0, [r0] +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: eor r1, r1, r0 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: ldrb r1, [r1, #3] -; ARMEB-NEXT: teq r1, r2 +; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_xor8_short_int: -; THUMB1: ldrb r0, [r0] -; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrh r0, [r0] +; THUMB1-NEXT: ldr r2, [r1] ; THUMB1-NEXT: eors r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: lsls r2, r2, #24 ; THUMB1-NEXT: beq .LBB1_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -84,13 +95,16 @@ define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_xor8_short_int: -; THUMB2: ldrb r2, [r0] -; THUMB2-NEXT: movs r0, #0 -; THUMB2-NEXT: ldrb r1, [r1] -; THUMB2-NEXT: teq.w r1, r2 +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: eors r0, r1 +; THUMB2-NEXT: lsls r0, r0, #24 +; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i32* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %conv = zext i16 %0 to i32 @@ -102,30 +116,34 @@ entry: } define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly %a, - i32* nocapture readonly %b) { ; ARM-LABEL: cmp_xor8_int_int: -; ARM: ldrb r2, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: eor r1, r1, r0 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: teq r1, r2 +; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_xor8_int_int: -; ARMEB: ldrb r2, [r0, #3] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: eor r1, r1, r0 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: ldrb r1, [r1, #3] -; ARMEB-NEXT: teq r1, r2 +; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_xor8_int_int: -; THUMB1: ldrb r0, [r0] -; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldr r0, [r0] +; THUMB1-NEXT: ldr r2, [r1] ; THUMB1-NEXT: eors r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: lsls r2, r2, #24 ; THUMB1-NEXT: beq .LBB2_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -133,13 +151,16 @@ define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_xor8_int_int: -; THUMB2: ldrb r2, [r0] -; THUMB2-NEXT: movs r0, #0 -; THUMB2-NEXT: ldrb r1, [r1] -; THUMB2-NEXT: teq.w r1, r2 +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: eors r0, r1 +; THUMB2-NEXT: lsls r0, r0, #24 +; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -150,30 +171,36 @@ entry: } define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a, - i32* nocapture readonly %b) { ; ARM-LABEL: cmp_xor16: -; ARM: ldrh r2, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: movw r2, #65535 +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: eor r1, r1, r0 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: ldrh r1, [r1] -; ARM-NEXT: teq r1, r2 +; ARM-NEXT: tst r1, r2 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_xor16: -; ARMEB: ldrh r2, [r0, #2] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: movw r2, #65535 +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: eor r1, r1, r0 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: ldrh r1, [r1, #2] -; ARMEB-NEXT: teq r1, r2 +; ARMEB-NEXT: tst r1, r2 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_xor16: -; THUMB1: ldrh r0, [r0] -; THUMB1-NEXT: ldrh r2, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldr r0, [r0] +; THUMB1-NEXT: ldr r2, [r1] ; THUMB1-NEXT: eors r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: lsls r2, r2, #16 ; THUMB1-NEXT: beq .LBB3_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -181,13 +208,16 @@ define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_xor16: -; THUMB2: ldrh r2, [r0] -; THUMB2-NEXT: movs r0, #0 -; THUMB2-NEXT: ldrh r1, [r1] -; THUMB2-NEXT: teq.w r1, r2 +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: eors r0, r1 +; THUMB2-NEXT: lsls r0, r0, #16 +; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -198,30 +228,34 @@ entry: } define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture readonly %a, - i16* nocapture readonly %b) { ; ARM-LABEL: cmp_or8_short_short: -; ARM: ldrb r0, [r0] -; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: orrs r0, r1, r0 +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: orr r1, r1, r0 ; ARM-NEXT: mov r0, #0 +; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_or8_short_short: -; ARMEB: ldrb r0, [r0, #1] -; ARMEB-NEXT: ldrb r1, [r1, #1] -; ARMEB-NEXT: orrs r0, r1, r0 +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrh r0, [r0] +; ARMEB-NEXT: ldrh r1, [r1] +; ARMEB-NEXT: orr r1, r1, r0 ; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_or8_short_short: -; THUMB1: ldrb r0, [r0] -; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrh r0, [r0] +; THUMB1-NEXT: ldrh r2, [r1] ; THUMB1-NEXT: orrs r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: lsls r2, r2, #24 ; THUMB1-NEXT: beq .LBB4_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -229,13 +263,16 @@ define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_or8_short_short: -; THUMB2: ldrb r0, [r0] -; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: ldrh r1, [r1] ; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: lsls r0, r0, #24 ; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i16* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %1 = load i16, i16* %b, align 2 @@ -246,30 +283,34 @@ entry: } define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly %a, - i32* nocapture readonly %b) { ; ARM-LABEL: cmp_or8_short_int: -; ARM: ldrb r0, [r0] -; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: orrs r0, r1, r0 +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: orr r1, r1, r0 ; ARM-NEXT: mov r0, #0 +; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_or8_short_int: -; ARMEB: ldrb r0, [r0, #1] -; ARMEB-NEXT: ldrb r1, [r1, #3] -; ARMEB-NEXT: orrs r0, r1, r0 +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrh r0, [r0] +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: orr r1, r1, r0 ; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_or8_short_int: -; THUMB1: ldrb r0, [r0] -; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrh r0, [r0] +; THUMB1-NEXT: ldr r2, [r1] ; THUMB1-NEXT: orrs r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: lsls r2, r2, #24 ; THUMB1-NEXT: beq .LBB5_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -277,13 +318,16 @@ define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_or8_short_int: -; THUMB2: ldrb r0, [r0] -; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: ldr r1, [r1] ; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: lsls r0, r0, #24 ; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i32* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %conv = zext i16 %0 to i32 @@ -295,30 +339,34 @@ entry: } define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly %a, - i32* nocapture readonly %b) { ; ARM-LABEL: cmp_or8_int_int: -; ARM: ldrb r0, [r0] -; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: orrs r0, r1, r0 +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: orr r1, r1, r0 ; ARM-NEXT: mov r0, #0 +; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_or8_int_int: -; ARMEB: ldrb r0, [r0, #3] -; ARMEB-NEXT: ldrb r1, [r1, #3] -; ARMEB-NEXT: orrs r0, r1, r0 +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: orr r1, r1, r0 ; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_or8_int_int: -; THUMB1: ldrb r0, [r0] -; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldr r0, [r0] +; THUMB1-NEXT: ldr r2, [r1] ; THUMB1-NEXT: orrs r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: lsls r2, r2, #24 ; THUMB1-NEXT: beq .LBB6_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -326,13 +374,16 @@ define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_or8_int_int: -; THUMB2: ldrb r0, [r0] -; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: ldr r1, [r1] ; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: lsls r0, r0, #24 ; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -343,30 +394,36 @@ entry: } define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a, - i32* nocapture readonly %b) { ; ARM-LABEL: cmp_or16: -; ARM: ldrh r0, [r0] -; ARM-NEXT: ldrh r1, [r1] -; ARM-NEXT: orrs r0, r1, r0 +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: movw r2, #65535 +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: orr r1, r1, r0 ; ARM-NEXT: mov r0, #0 +; ARM-NEXT: tst r1, r2 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_or16: -; ARMEB: ldrh r0, [r0, #2] -; ARMEB-NEXT: ldrh r1, [r1, #2] -; ARMEB-NEXT: orrs r0, r1, r0 +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: movw r2, #65535 +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: orr r1, r1, r0 ; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: tst r1, r2 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_or16: -; THUMB1: ldrh r0, [r0] -; THUMB1-NEXT: ldrh r2, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldr r0, [r0] +; THUMB1-NEXT: ldr r2, [r1] ; THUMB1-NEXT: orrs r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: lsls r2, r2, #16 ; THUMB1-NEXT: beq .LBB7_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -374,13 +431,16 @@ define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_or16: -; THUMB2: ldrh r0, [r0] -; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: ldr r1, [r1] ; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: lsls r0, r0, #16 ; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -391,29 +451,34 @@ entry: } define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture readonly %a, - i16* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_short_short: -; ARM: ldrb r2, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: and r1, r0, r1 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: tst r2, r1 +; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_and8_short_short: -; ARMEB: ldrb r2, [r0, #1] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrh r1, [r1] +; ARMEB-NEXT: ldrh r0, [r0] +; ARMEB-NEXT: and r1, r0, r1 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: ldrb r1, [r1, #1] -; ARMEB-NEXT: tst r2, r1 +; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_and8_short_short: -; THUMB1: ldrb r2, [r1] -; THUMB1-NEXT: ldrb r3, [r0] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrh r1, [r1] +; THUMB1-NEXT: ldrh r2, [r0] +; THUMB1-NEXT: ands r2, r1 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: tst r3, r2 +; THUMB1-NEXT: lsls r2, r2, #24 ; THUMB1-NEXT: beq .LBB8_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -421,13 +486,16 @@ define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_and8_short_short: -; THUMB2: ldrb r2, [r0] -; THUMB2-NEXT: movs r0, #0 -; THUMB2-NEXT: ldrb r1, [r1] -; THUMB2-NEXT: tst r2, r1 +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: ands r0, r1 +; THUMB2-NEXT: lsls r0, r0, #24 +; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i16* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %1 = load i16, i16* %b, align 2 @@ -438,29 +506,34 @@ entry: } define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, - i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_short_int: -; ARM: ldrb r2, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: and r1, r1, r0 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: tst r1, r2 +; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_and8_short_int: -; ARMEB: ldrb r2, [r0, #1] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrh r0, [r0] +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: and r1, r1, r0 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: ldrb r1, [r1, #3] -; ARMEB-NEXT: tst r1, r2 +; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_and8_short_int: -; THUMB1: ldrb r2, [r0] -; THUMB1-NEXT: ldrb r3, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrh r0, [r0] +; THUMB1-NEXT: ldr r2, [r1] +; THUMB1-NEXT: ands r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: tst r3, r2 +; THUMB1-NEXT: lsls r2, r2, #24 ; THUMB1-NEXT: beq .LBB9_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -468,13 +541,16 @@ define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_and8_short_int: -; THUMB2: ldrb r2, [r0] -; THUMB2-NEXT: movs r0, #0 -; THUMB2-NEXT: ldrb r1, [r1] -; THUMB2-NEXT: tst r1, r2 +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: ands r0, r1 +; THUMB2-NEXT: lsls r0, r0, #24 +; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i32* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %1 = load i32, i32* %b, align 4 @@ -486,29 +562,34 @@ entry: } define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly %a, - i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_int_int: -; ARM: ldrb r2, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: and r1, r0, r1 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: ldrb r1, [r1] -; ARM-NEXT: tst r2, r1 +; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_and8_int_int: -; ARMEB: ldrb r2, [r0, #3] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: and r1, r0, r1 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: ldrb r1, [r1, #3] -; ARMEB-NEXT: tst r2, r1 +; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_and8_int_int: -; THUMB1: ldrb r2, [r1] -; THUMB1-NEXT: ldrb r3, [r0] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldr r1, [r1] +; THUMB1-NEXT: ldr r2, [r0] +; THUMB1-NEXT: ands r2, r1 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: tst r3, r2 +; THUMB1-NEXT: lsls r2, r2, #24 ; THUMB1-NEXT: beq .LBB10_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -516,13 +597,16 @@ define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_and8_int_int: -; THUMB2: ldrb r2, [r0] -; THUMB2-NEXT: movs r0, #0 -; THUMB2-NEXT: ldrb r1, [r1] -; THUMB2-NEXT: tst r2, r1 +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: ands r0, r1 +; THUMB2-NEXT: lsls r0, r0, #24 +; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -533,29 +617,36 @@ entry: } define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a, - i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and16: -; ARM: ldrh r2, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: movw r2, #65535 +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: and r1, r0, r1 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: ldrh r1, [r1] -; ARM-NEXT: tst r2, r1 +; ARM-NEXT: tst r1, r2 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_and16: -; ARMEB: ldrh r2, [r0, #2] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: movw r2, #65535 +; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: and r1, r0, r1 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: ldrh r1, [r1, #2] -; ARMEB-NEXT: tst r2, r1 +; ARMEB-NEXT: tst r1, r2 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_and16: -; THUMB1: ldrh r2, [r1] -; THUMB1-NEXT: ldrh r3, [r0] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldr r1, [r1] +; THUMB1-NEXT: ldr r2, [r0] +; THUMB1-NEXT: ands r2, r1 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: tst r3, r2 +; THUMB1-NEXT: lsls r2, r2, #16 ; THUMB1-NEXT: beq .LBB11_2 ; THUMB1-NEXT: @ %bb.1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -563,13 +654,16 @@ define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a, ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: cmp_and16: -; THUMB2: ldrh r2, [r0] -; THUMB2-NEXT: movs r0, #0 -; THUMB2-NEXT: ldrh r1, [r1] -; THUMB2-NEXT: tst r2, r1 +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: ands r0, r1 +; THUMB2-NEXT: lsls r0, r0, #16 +; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr + i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -581,31 +675,35 @@ entry: define arm_aapcscc i32 @add_and16(i32* nocapture readonly %a, i32 %y, i32 %z) { ; ARM-LABEL: add_and16: -; ARM: add r1, r1, r2 -; ARM-NEXT: ldrh r0, [r0] -; ARM-NEXT: uxth r1, r1 +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: add r1, r1, r2 ; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: uxth r0, r0 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: add_and16: -; ARMEB: add r1, r1, r2 -; ARMEB-NEXT: ldrh r0, [r0, #2] -; ARMEB-NEXT: uxth r1, r1 +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: add r1, r1, r2 ; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: uxth r0, r0 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: add_and16: -; THUMB1: adds r1, r1, r2 -; THUMB1-NEXT: uxth r1, r1 -; THUMB1-NEXT: ldrh r0, [r0] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: adds r1, r1, r2 +; THUMB1-NEXT: ldr r0, [r0] ; THUMB1-NEXT: orrs r0, r1 +; THUMB1-NEXT: uxth r0, r0 ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: add_and16: -; THUMB2: add r1, r2 -; THUMB2-NEXT: ldrh r0, [r0] -; THUMB2-NEXT: uxth r1, r1 +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: add r1, r2 ; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: uxth r0, r0 ; THUMB2-NEXT: bx lr entry: %x = load i32, i32* %a, align 4 @@ -617,39 +715,43 @@ entry: define arm_aapcscc i32 @test1(i32* %a, i32* %b, i32 %x, i32 %y) { ; ARM-LABEL: test1: -; ARM: mul r2, r2, r3 -; ARM-NEXT: ldrh r1, [r1] -; ARM-NEXT: ldrh r0, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: mul r2, r2, r3 +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: ldr r0, [r0] ; ARM-NEXT: eor r0, r0, r1 -; ARM-NEXT: uxth r1, r2 -; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: orr r0, r0, r2 +; ARM-NEXT: uxth r0, r0 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: test1: -; ARMEB: mul r2, r2, r3 -; ARMEB-NEXT: ldrh r1, [r1, #2] -; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: mul r2, r2, r3 +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: ldr r0, [r0] ; ARMEB-NEXT: eor r0, r0, r1 -; ARMEB-NEXT: uxth r1, r2 -; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: orr r0, r0, r2 +; ARMEB-NEXT: uxth r0, r0 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: test1: -; THUMB1: ldrh r1, [r1] -; THUMB1-NEXT: ldrh r4, [r0] -; THUMB1-NEXT: eors r4, r1 +; THUMB1: @ %bb.0: @ %entry ; THUMB1-NEXT: muls r2, r3, r2 -; THUMB1-NEXT: uxth r0, r2 -; THUMB1-NEXT: orrs r0, r4 -; THUMB1-NEXT: pop +; THUMB1-NEXT: ldr r1, [r1] +; THUMB1-NEXT: ldr r0, [r0] +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: orrs r0, r2 +; THUMB1-NEXT: uxth r0, r0 +; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: test1: -; THUMB2: ldrh r1, [r1] -; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: muls r2, r3, r2 +; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: ldr r0, [r0] ; THUMB2-NEXT: eors r0, r1 -; THUMB2-NEXT: mul r1, r2, r3 -; THUMB2-NEXT: uxth r1, r1 -; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: orrs r0, r2 +; THUMB2-NEXT: uxth r0, r0 ; THUMB2-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 @@ -663,7 +765,8 @@ entry: define arm_aapcscc i32 @test2(i32* %a, i32* %b, i32 %x, i32 %y) { ; ARM-LABEL: test2: -; ARM: ldr r1, [r1] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r1, [r1] ; ARM-NEXT: ldr r0, [r0] ; ARM-NEXT: mul r1, r2, r1 ; ARM-NEXT: eor r0, r0, r3 @@ -672,7 +775,8 @@ define arm_aapcscc i32 @test2(i32* %a, i32* %b, i32 %x, i32 %y) { ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: test2: -; ARMEB: ldr r1, [r1] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r1, [r1] ; ARMEB-NEXT: ldr r0, [r0] ; ARMEB-NEXT: mul r1, r2, r1 ; ARMEB-NEXT: eor r0, r0, r3 @@ -681,7 +785,8 @@ define arm_aapcscc i32 @test2(i32* %a, i32* %b, i32 %x, i32 %y) { ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: test2: -; THUMB1: ldr r1, [r1] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldr r1, [r1] ; THUMB1-NEXT: muls r1, r2, r1 ; THUMB1-NEXT: ldr r0, [r0] ; THUMB1-NEXT: eors r0, r3 @@ -690,7 +795,8 @@ define arm_aapcscc i32 @test2(i32* %a, i32* %b, i32 %x, i32 %y) { ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: test2: -; THUMB2: ldr r1, [r1] +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r1, [r1] ; THUMB2-NEXT: ldr r0, [r0] ; THUMB2-NEXT: muls r1, r2, r1 ; THUMB2-NEXT: eors r0, r3 @@ -709,7 +815,8 @@ entry: define arm_aapcscc i32 @test3(i32* %a, i32* %b, i32 %x, i16* %y) { ; ARM-LABEL: test3: -; ARM: ldr r0, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r0, [r0] ; ARM-NEXT: mul r1, r2, r0 ; ARM-NEXT: ldrh r2, [r3] ; ARM-NEXT: eor r0, r0, r2 @@ -718,7 +825,8 @@ define arm_aapcscc i32 @test3(i32* %a, i32* %b, i32 %x, i16* %y) { ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: test3: -; ARMEB: ldr r0, [r0] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r0, [r0] ; ARMEB-NEXT: mul r1, r2, r0 ; ARMEB-NEXT: ldrh r2, [r3] ; ARMEB-NEXT: eor r0, r0, r2 @@ -727,7 +835,8 @@ define arm_aapcscc i32 @test3(i32* %a, i32* %b, i32 %x, i16* %y) { ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: test3: -; THUMB1: ldr r0, [r0] +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldr r0, [r0] ; THUMB1-NEXT: muls r2, r0, r2 ; THUMB1-NEXT: ldrh r1, [r3] ; THUMB1-NEXT: eors r1, r0 @@ -736,7 +845,8 @@ define arm_aapcscc i32 @test3(i32* %a, i32* %b, i32 %x, i16* %y) { ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: test3: -; THUMB2: ldr r0, [r0] +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r0, [r0] ; THUMB2-NEXT: mul r1, r2, r0 ; THUMB2-NEXT: ldrh r2, [r3] ; THUMB2-NEXT: eors r0, r2 @@ -756,39 +866,43 @@ entry: define arm_aapcscc i32 @test4(i32* %a, i32* %b, i32 %x, i32 %y) { ; ARM-LABEL: test4: -; ARM: mul r2, r2, r3 -; ARM-NEXT: ldrh r1, [r1] -; ARM-NEXT: ldrh r0, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: mul r2, r2, r3 +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: ldr r0, [r0] ; ARM-NEXT: eor r0, r0, r1 -; ARM-NEXT: uxth r1, r2 -; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: orr r0, r0, r2 +; ARM-NEXT: uxth r0, r0 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: test4: -; ARMEB: mul r2, r2, r3 -; ARMEB-NEXT: ldrh r1, [r1, #2] -; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: mul r2, r2, r3 +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: ldr r0, [r0] ; ARMEB-NEXT: eor r0, r0, r1 -; ARMEB-NEXT: uxth r1, r2 -; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: orr r0, r0, r2 +; ARMEB-NEXT: uxth r0, r0 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: test4: -; THUMB1: ldrh r1, [r1] -; THUMB1-NEXT: ldrh r4, [r0] -; THUMB1-NEXT: eors r4, r1 +; THUMB1: @ %bb.0: @ %entry ; THUMB1-NEXT: muls r2, r3, r2 -; THUMB1-NEXT: uxth r0, r2 -; THUMB1-NEXT: orrs r0, r4 -; THUMB1-NEXT: pop +; THUMB1-NEXT: ldr r1, [r1] +; THUMB1-NEXT: ldr r0, [r0] +; THUMB1-NEXT: eors r0, r1 +; THUMB1-NEXT: orrs r0, r2 +; THUMB1-NEXT: uxth r0, r0 +; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: test4: -; THUMB2: ldrh r1, [r1] -; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: muls r2, r3, r2 +; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: ldr r0, [r0] ; THUMB2-NEXT: eors r0, r1 -; THUMB2-NEXT: mul r1, r2, r3 -; THUMB2-NEXT: uxth r1, r1 -; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: orrs r0, r2 +; THUMB2-NEXT: uxth r0, r0 ; THUMB2-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 @@ -802,39 +916,43 @@ entry: define arm_aapcscc i32 @test5(i32* %a, i32* %b, i32 %x, i16 zeroext %y) { ; ARM-LABEL: test5: -; ARM: ldr r1, [r1] -; ARM-NEXT: ldrh r0, [r0] +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldr r1, [r1] +; ARM-NEXT: ldr r0, [r0] ; ARM-NEXT: mul r1, r2, r1 ; ARM-NEXT: eor r0, r0, r3 -; ARM-NEXT: uxth r1, r1 ; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: uxth r0, r0 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: test5: -; ARMEB: ldr r1, [r1] -; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldr r1, [r1] +; ARMEB-NEXT: ldr r0, [r0] ; ARMEB-NEXT: mul r1, r2, r1 ; ARMEB-NEXT: eor r0, r0, r3 -; ARMEB-NEXT: uxth r1, r1 ; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: uxth r0, r0 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: test5: -; THUMB1: ldrh r4, [r0] -; THUMB1-NEXT: eors r4, r3 -; THUMB1-NEXT: ldr r0, [r1] -; THUMB1-NEXT: muls r0, r2, r0 +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldr r1, [r1] +; THUMB1-NEXT: muls r1, r2, r1 +; THUMB1-NEXT: ldr r0, [r0] +; THUMB1-NEXT: eors r0, r3 +; THUMB1-NEXT: orrs r0, r1 ; THUMB1-NEXT: uxth r0, r0 -; THUMB1-NEXT: orrs r0, r4 -; THUMB1-NEXT: pop +; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: test5: -; THUMB2: ldr r1, [r1] -; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: ldr r0, [r0] ; THUMB2-NEXT: muls r1, r2, r1 ; THUMB2-NEXT: eors r0, r3 -; THUMB2-NEXT: uxth r1, r1 ; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: uxth r0, r0 ; THUMB2-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 @@ -846,63 +964,3 @@ entry: %and = and i32 %or, 65535 ret i32 %and } - -define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) { -; ARM-LABEL: test6: -; ARM: ldrb r0, [r0] -; ARM: uxtb r2, r2 -; ARM: and r0, r0, r1 -; ARM: uxtb r1, r0 - -; ARMEB-LABEL: test6: -; ARMEB: ldrb r0, [r0] -; ARMEB: uxtb r2, r2 -; ARMEB: and r0, r0, r1 -; ARMEB: uxtb r1, r0 - -; THUMB1-LABEL: test6: -; THUMB1: ldrb r0, [r0] -; THUMB1: ands r0, r1 -; THUMB1: uxtb r3, r0 -; THUMB1: uxtb r2, r2 - -; THUMB2-LABEL: test6: -; THUMB2: ldrb r0, [r0] -; THUMB2: uxtb r2, r2 -; THUMB2: ands r0, r1 -; THUMB2: uxtb r1, r0 -entry: - %0 = load i8, i8* %x, align 4 - %1 = and i8 %0, %y - %2 = icmp eq i8 %1, %z - ret i1 %2 -} - -define arm_aapcscc i1 @test7(i16* %x, i16 %y, i8 %z) { -; ARM-LABEL: test7: -; ARM: ldrb r0, [r0] -; ARM: uxtb r2, r2 -; ARM: and r1, r0, r1 - -; ARMEB-LABEL: test7: -; ARMEB: ldrb r0, [r0, #1] -; ARMEB: uxtb r2, r2 -; ARMEB: and r1, r0, r1 - -; THUMB1-LABEL: test7: -; THUMB1: ldrb r3, [r0] -; THUMB1: ands r3, r1 -; THUMB1: uxtb r2, r2 - -; THUMB2-LABEL: test7: -; THUMB2: ldrb r0, [r0] -; THUMB2: uxtb r2, r2 -; THUMB2: ands r1, r0 -entry: - %0 = load i16, i16* %x, align 4 - %1 = and i16 %0, %y - %2 = trunc i16 %1 to i8 - %3 = icmp eq i8 %2, %z - ret i1 %3 -} - |