summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNirav Dave <niravd@google.com>2018-11-08 19:14:20 +0000
committerNirav Dave <niravd@google.com>2018-11-08 19:14:20 +0000
commit6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f (patch)
treebcfc45bd36ac8be46d6c84a2b7e5e3625ee985a9
parentf3dc9649ced6d3e5a2574bf939e2c8cfcfb9c465 (diff)
downloadbcm5719-llvm-6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f.tar.gz
bcm5719-llvm-6ce9f72f76e3c1c9c5b1cd5a65ba2b0bb319294f.zip
[DAGCombine] Improve alias analysis for chain of independent stores.
FindBetterNeighborChains simulateanously improves the chain dependencies of a chain of related stores avoiding the generation of extra token factors. For chains longer than the GatherAllAliasDepths, stores further down in the chain will necessarily fail, a potentially significant waste and preventing otherwise trivial parallelization. This patch directly parallelize the chains of stores before improving each store. This generally improves DAG-level parallelism. Reviewers: courbet, spatel, RKSimon, bogner, efriedma, craig.topper, rnk Subscribers: sdardis, javed.absar, hiraditya, jrtc27, atanasyan, llvm-commits Differential Revision: https://reviews.llvm.org/D53552 llvm-svn: 346432
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp175
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll9
-rw-r--r--llvm/test/CodeGen/AArch64/ldst-opt.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/swifterror.ll5
-rw-r--r--llvm/test/CodeGen/ARM/arm-storebytesmerge.ll178
-rw-r--r--llvm/test/CodeGen/ARM/misched-fusion-aes.ll15
-rw-r--r--llvm/test/CodeGen/Mips/fastcc.ll36
-rw-r--r--llvm/test/CodeGen/SystemZ/pr36164.ll69
-rw-r--r--llvm/test/CodeGen/X86/stores-merging.ll3
9 files changed, 272 insertions, 232 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fc0e8efebdc..bb0c3fb4c2f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
@@ -490,6 +491,10 @@ namespace {
/// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
+ // Helper for findBetterNeighborChains. Walk up store chain add additional
+ // chained stores that do not overlap and can be parallelized.
+ bool parallelizeChainedStores(StoreSDNode *St);
+
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -18905,6 +18910,11 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+// TODO: Replace with with std::monostate when we move to C++17.
+struct UnitT { } Unit;
+bool operator==(const UnitT &, const UnitT &) { return true; }
+bool operator!=(const UnitT &, const UnitT &) { return false; }
+
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
// redundant, as this function gets called when visiting every store
@@ -18917,13 +18927,22 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// the nodes that will eventually be candidates, and then not be able
// to go from a partially-merged state to the desired final
// fully-merged state.
-bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None)
- return false;
+
+bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ StoreSDNode *STChain = St;
+ // Intervals records which offsets from BaseIndex have been covered. In
+ // the common case, every store writes to the immediately previous address
+ // space and thus merged with the previous interval at insertion time.
+
+ using IMap =
+ llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
+ IMap::Allocator A;
+ IMap Intervals(A);
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@@ -18933,76 +18952,114 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (BasePtr.getBase().isUndef())
return false;
- SmallVector<StoreSDNode *, 8> ChainedStores;
- ChainedStores.push_back(St);
+ // Add ST's interval.
+ Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
- // Walk up the chain and look for nodes with offsets from the same
- // base pointer. Stop when reaching an instruction with a different kind
- // or instruction which has a different base pointer.
- StoreSDNode *Index = St;
- while (Index) {
+ while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
// If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 0)->hasOneUse())
+ if (!SDValue(Chain, 0)->hasOneUse())
break;
-
- if (Index->isVolatile() || Index->isIndexed())
+ if (Chain->isVolatile() || Chain->isIndexed())
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
-
+ const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
// Check that the base pointer is the same as the original one.
- if (!BasePtr.equalBaseIndex(Ptr, DAG))
+ int64_t Offset;
+ if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
break;
+ int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
+ // Make sure we don't overlap with other intervals by checking the ones to
+ // the left or right before inserting.
+ auto I = Intervals.find(Offset);
+ // If there's a next interval, we should end before it.
+ if (I != Intervals.end() && I.start() < (Offset + Length))
+ break;
+ // If there's a previous interval, we should start after it.
+ if (I != Intervals.begin() && (--I).stop() <= Offset)
+ break;
+ Intervals.insert(Offset, Offset + Length, Unit);
- // Walk up the chain to find the next store node, ignoring any
- // intermediate loads. Any other kind of node will halt the loop.
- SDNode *NextInChain = Index->getChain().getNode();
- while (true) {
- if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
- // We found a store node. Use it for the next iteration.
- if (STn->isVolatile() || STn->isIndexed()) {
- Index = nullptr;
- break;
- }
- ChainedStores.push_back(STn);
- Index = STn;
- break;
- } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
- NextInChain = Ldn->getChain().getNode();
- continue;
- } else {
- Index = nullptr;
- break;
- }
- }// end while
+ ChainedStores.push_back(Chain);
+ STChain = Chain;
}
- // At this point, ChainedStores lists all of the Store nodes
- // reachable by iterating up through chain nodes matching the above
- // conditions. For each such store identified, try to find an
- // earlier chain to attach the store to which won't violate the
- // required ordering.
- bool MadeChangeToSt = false;
- SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+ // If we didn't find a chained store, exit.
+ if (ChainedStores.size() == 0)
+ return false;
+
+ // Improve all chained stores (St and ChainedStores members) starting from
+ // where the store chain ended and return single TokenFactor.
+ SDValue NewChain = STChain->getChain();
+ SmallVector<SDValue, 8> TFOps;
+ for (unsigned I = ChainedStores.size(); I;) {
+ StoreSDNode *S = ChainedStores[--I];
+ SDValue BetterChain = FindBetterChain(S, NewChain);
+ S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
+ S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
+ TFOps.push_back(SDValue(S, 0));
+ ChainedStores[I] = S;
+ }
+
+ // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
+ SDValue BetterChain = FindBetterChain(St, NewChain);
+ SDValue NewST;
+ if (St->isTruncatingStore())
+ NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemoryVT(),
+ St->getMemOperand());
+ else
+ NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
+ St->getBasePtr(), St->getMemOperand());
- for (StoreSDNode *ChainedStore : ChainedStores) {
- SDValue Chain = ChainedStore->getChain();
- SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+ TFOps.push_back(NewST);
- if (Chain != BetterChain) {
- if (ChainedStore == St)
- MadeChangeToSt = true;
- BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
- }
- }
+ // If we improved every element of TFOps, then we've lost the dependence on
+ // NewChain to successors of St and we need to add it back to TFOps. Do so at
+ // the beginning to keep relative order consistent with FindBetterChains.
+ auto hasImprovedChain = [&](SDValue ST) -> bool {
+ return ST->getOperand(0) != NewChain;
+ };
+ bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
+ if (AddNewChain)
+ TFOps.insert(TFOps.begin(), NewChain);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+ CombineTo(St, TF);
+
+ AddToWorklist(STChain);
+ // Add TF operands worklist in reverse order.
+ for (auto I = TF->getNumOperands(); I;)
+ AddToWorklist(TF->getOperand(--I).getNode());
+ AddToWorklist(TF.getNode());
+ return true;
+}
+
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
- // Do all replacements after finding the replacements to make to avoid making
- // the chains more complicated by introducing new TokenFactors.
- for (auto Replacement : BetterChains)
- replaceStoreChain(Replacement.first, Replacement.second);
+ // We must have a base and an offset.
+ if (!BasePtr.getBase().getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.getBase().isUndef())
+ return false;
+
+ // Directly improve a chain of disjoint stores starting at St.
+ if (parallelizeChainedStores(St))
+ return true;
- return MadeChangeToSt;
+ // Improve St's Chain..
+ SDValue BetterChain = FindBetterChain(St, St->getChain());
+ if (St->getChain() != BetterChain) {
+ replaceStoreChain(St, BetterChain);
+ return true;
+ }
+ return false;
}
/// This is the entry point for the file.
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 92c320a8210..b0a42565527 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -7,14 +7,13 @@ define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
; CHECK-LABEL: fn9:
; 9th fixed argument
; CHECK: ldr {{w[0-9]+}}, [sp, #64]
-; CHECK: add [[ARGS:x[0-9]+]], sp, #72
-; CHECK: add {{x[0-9]+}}, [[ARGS]], #8
+; CHECK-DAG: add [[ARGS:x[0-9]+]], sp, #72
; First vararg
-; CHECK: ldr {{w[0-9]+}}, [sp, #72]
+; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #72]
; Second vararg
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
+; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #80]
; Third vararg
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
+; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #88]
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt.ll b/llvm/test/CodeGen/AArch64/ldst-opt.ll
index ae3f59ee8f5..7f6cba2133f 100644
--- a/llvm/test/CodeGen/AArch64/ldst-opt.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-opt.ll
@@ -1465,10 +1465,10 @@ entry:
define void @merge_zr32_3vec(<3 x i32>* %p) {
; CHECK-LABEL: merge_zr32_3vec:
; CHECK: // %entry
-; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
-; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
-; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
+; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #4]
+; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <3 x i32> zeroinitializer, <3 x i32>* %p
@@ -1480,8 +1480,8 @@ define void @merge_zr32_4vec(<4 x i32>* %p) {
; CHECK-LABEL: merge_zr32_4vec:
; CHECK: // %entry
; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
-; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <4 x i32> zeroinitializer, <4 x i32>* %p
@@ -1505,8 +1505,8 @@ define void @merge_zr32_4vecf(<4 x float>* %p) {
; CHECK-LABEL: merge_zr32_4vecf:
; CHECK: // %entry
; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
-; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
+; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <4 x float> zeroinitializer, <4 x float>* %p
@@ -1589,8 +1589,8 @@ entry:
define void @merge_zr64_3vec(<3 x i64>* %p) {
; CHECK-LABEL: merge_zr64_3vec:
; CHECK: // %entry
-; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
-; CHECK-NEXT: str xzr, [x{{[0-9]+}}, #16]
+; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #8]
+; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <3 x i64> zeroinitializer, <3 x i64>* %p
diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll
index 637ff3e2e29..8ea89464ab0 100644
--- a/llvm/test/CodeGen/AArch64/swifterror.ll
+++ b/llvm/test/CodeGen/AArch64/swifterror.ll
@@ -314,13 +314,12 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
; CHECK-APPLE-DAG: strb [[ID]], [x0, #8]
; First vararg
-; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16]
; Second vararg
-; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
+; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24]
; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16
; Third vararg
-; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
+; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32]
; CHECK-APPLE: mov x21, x0
; CHECK-APPLE-NOT: x21
diff --git a/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll b/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll
index edc25302f7c..00c5914b34b 100644
--- a/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll
+++ b/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll
@@ -8,101 +8,95 @@ target triple = "thumbv7em-arm-none-eabi"
define arm_aapcs_vfpcc void @test(i8* %v50) #0 {
; CHECK-LABEL: test:
; CHECK: @ %bb.0:
-; CHECK-NEXT: movw r1, #35722
-; CHECK-NEXT: movt r1, #36236
-; CHECK-NEXT: str.w r1, [r0, #394]
-; CHECK-NEXT: movw r1, #36750
-; CHECK-NEXT: movt r1, #37264
-; CHECK-NEXT: str.w r1, [r0, #398]
-; CHECK-NEXT: movw r1, #37778
-; CHECK-NEXT: movt r1, #38292
-; CHECK-NEXT: str.w r1, [r0, #402]
-; CHECK-NEXT: movw r1, #38806
-; CHECK-NEXT: movt r1, #39320
-; CHECK-NEXT: str.w r1, [r0, #406]
-; CHECK-NEXT: movw r1, #39834
-; CHECK-NEXT: strh.w r1, [r0, #410]
-; CHECK-NEXT: movw r1, #40348
-; CHECK-NEXT: movt r1, #40862
-; CHECK-NEXT: str.w r1, [r0, #412]
-; CHECK-NEXT: movw r1, #41376
-; CHECK-NEXT: movt r1, #41890
-; CHECK-NEXT: str.w r1, [r0, #416]
-; CHECK-NEXT: movw r1, #42404
-; CHECK-NEXT: movt r1, #42918
-; CHECK-NEXT: str.w r1, [r0, #420]
-; CHECK-NEXT: movw r1, #43432
-; CHECK-NEXT: movt r1, #43946
-; CHECK-NEXT: str.w r1, [r0, #424]
-; CHECK-NEXT: movw r1, #44460
-; CHECK-NEXT: movt r1, #44974
-; CHECK-NEXT: str.w r1, [r0, #428]
-; CHECK-NEXT: movw r1, #45488
-; CHECK-NEXT: strh.w r1, [r0, #432]
+; CHECK-NEXT: movw r1, #65534
+; CHECK-NEXT: strh.w r1, [r0, #510]
+; CHECK-NEXT: movw r1, #64506
+; CHECK-NEXT: movt r1, #65020
+; CHECK-NEXT: str.w r1, [r0, #506]
+; CHECK-NEXT: movw r1, #63478
+; CHECK-NEXT: movt r1, #63992
+; CHECK-NEXT: str.w r1, [r0, #502]
+; CHECK-NEXT: movw r1, #62450
+; CHECK-NEXT: movt r1, #62964
+; CHECK-NEXT: str.w r1, [r0, #498]
+; CHECK-NEXT: movw r1, #61422
+; CHECK-NEXT: movt r1, #61936
+; CHECK-NEXT: str.w r1, [r0, #494]
+; CHECK-NEXT: movw r1, #60394
+; CHECK-NEXT: movt r1, #60908
+; CHECK-NEXT: str.w r1, [r0, #490]
+; CHECK-NEXT: movw r1, #59366
+; CHECK-NEXT: movt r1, #59880
+; CHECK-NEXT: str.w r1, [r0, #486]
+; CHECK-NEXT: movw r1, #58338
+; CHECK-NEXT: movt r1, #58852
+; CHECK-NEXT: str.w r1, [r0, #482]
+; CHECK-NEXT: movw r1, #57310
+; CHECK-NEXT: movt r1, #57824
+; CHECK-NEXT: str.w r1, [r0, #478]
+; CHECK-NEXT: movw r1, #56282
+; CHECK-NEXT: movt r1, #56796
+; CHECK-NEXT: str.w r1, [r0, #474]
+; CHECK-NEXT: movw r1, #55254
+; CHECK-NEXT: movt r1, #55768
+; CHECK-NEXT: str.w r1, [r0, #470]
+; CHECK-NEXT: movw r1, #54226
+; CHECK-NEXT: movt r1, #54740
+; CHECK-NEXT: str.w r1, [r0, #466]
+; CHECK-NEXT: movw r1, #53198
+; CHECK-NEXT: movt r1, #53712
+; CHECK-NEXT: str.w r1, [r0, #462]
+; CHECK-NEXT: movw r1, #52170
+; CHECK-NEXT: movt r1, #52684
+; CHECK-NEXT: str.w r1, [r0, #458]
+; CHECK-NEXT: movw r1, #51142
+; CHECK-NEXT: movt r1, #51656
+; CHECK-NEXT: str.w r1, [r0, #454]
+; CHECK-NEXT: movw r1, #50114
+; CHECK-NEXT: movt r1, #50628
+; CHECK-NEXT: str.w r1, [r0, #450]
+; CHECK-NEXT: movw r1, #49086
+; CHECK-NEXT: movt r1, #49600
+; CHECK-NEXT: str.w r1, [r0, #446]
+; CHECK-NEXT: movw r1, #48058
+; CHECK-NEXT: movt r1, #48572
+; CHECK-NEXT: str.w r1, [r0, #442]
+; CHECK-NEXT: movw r1, #47030
+; CHECK-NEXT: movt r1, #47544
+; CHECK-NEXT: str.w r1, [r0, #438]
; CHECK-NEXT: movw r1, #46002
; CHECK-NEXT: movt r1, #46516
; CHECK-NEXT: str.w r1, [r0, #434]
-; CHECK-NEXT: movw r1, #47030
-; CHECK-NEXT: strh.w r1, [r0, #438]
-; CHECK-NEXT: movw r1, #47544
-; CHECK-NEXT: movt r1, #48058
-; CHECK-NEXT: str.w r1, [r0, #440]
-; CHECK-NEXT: movw r1, #48572
-; CHECK-NEXT: movt r1, #49086
-; CHECK-NEXT: str.w r1, [r0, #444]
-; CHECK-NEXT: movw r1, #49600
-; CHECK-NEXT: strh.w r1, [r0, #448]
-; CHECK-NEXT: movs r1, #194
-; CHECK-NEXT: strb.w r1, [r0, #450]
-; CHECK-NEXT: movw r1, #50371
-; CHECK-NEXT: movt r1, #50885
-; CHECK-NEXT: str.w r1, [r0, #451]
-; CHECK-NEXT: movw r1, #51399
-; CHECK-NEXT: movt r1, #51913
-; CHECK-NEXT: str.w r1, [r0, #455]
-; CHECK-NEXT: movw r1, #52427
-; CHECK-NEXT: movt r1, #52941
-; CHECK-NEXT: str.w r1, [r0, #459]
-; CHECK-NEXT: movw r1, #53455
-; CHECK-NEXT: movt r1, #53969
-; CHECK-NEXT: str.w r1, [r0, #463]
-; CHECK-NEXT: movw r1, #54483
-; CHECK-NEXT: strh.w r1, [r0, #467]
-; CHECK-NEXT: movw r1, #54997
-; CHECK-NEXT: movt r1, #55511
-; CHECK-NEXT: str.w r1, [r0, #469]
-; CHECK-NEXT: movw r1, #56025
-; CHECK-NEXT: movt r1, #56539
-; CHECK-NEXT: str.w r1, [r0, #473]
-; CHECK-NEXT: movw r1, #57053
-; CHECK-NEXT: movt r1, #57567
-; CHECK-NEXT: str.w r1, [r0, #477]
-; CHECK-NEXT: movw r1, #58081
-; CHECK-NEXT: movt r1, #58595
-; CHECK-NEXT: str.w r1, [r0, #481]
-; CHECK-NEXT: movw r1, #59109
-; CHECK-NEXT: movt r1, #59623
-; CHECK-NEXT: str.w r1, [r0, #485]
-; CHECK-NEXT: movw r1, #60137
-; CHECK-NEXT: strh.w r1, [r0, #489]
-; CHECK-NEXT: movw r1, #60651
-; CHECK-NEXT: movt r1, #61165
-; CHECK-NEXT: str.w r1, [r0, #491]
-; CHECK-NEXT: movw r1, #61679
-; CHECK-NEXT: strh.w r1, [r0, #495]
-; CHECK-NEXT: movw r1, #62193
-; CHECK-NEXT: movt r1, #62707
-; CHECK-NEXT: str.w r1, [r0, #497]
-; CHECK-NEXT: movw r1, #63221
-; CHECK-NEXT: movt r1, #63735
-; CHECK-NEXT: str.w r1, [r0, #501]
-; CHECK-NEXT: movw r1, #64249
-; CHECK-NEXT: strh.w r1, [r0, #505]
-; CHECK-NEXT: movs r1, #251
-; CHECK-NEXT: strb.w r1, [r0, #507]
-; CHECK-NEXT: movw r1, #65020
-; CHECK-NEXT: movt r1, #65534
-; CHECK-NEXT: str.w r1, [r0, #508]
+; CHECK-NEXT: movw r1, #44974
+; CHECK-NEXT: movt r1, #45488
+; CHECK-NEXT: str.w r1, [r0, #430]
+; CHECK-NEXT: movw r1, #43946
+; CHECK-NEXT: movt r1, #44460
+; CHECK-NEXT: str.w r1, [r0, #426]
+; CHECK-NEXT: movw r1, #42918
+; CHECK-NEXT: movt r1, #43432
+; CHECK-NEXT: str.w r1, [r0, #422]
+; CHECK-NEXT: movw r1, #41890
+; CHECK-NEXT: movt r1, #42404
+; CHECK-NEXT: str.w r1, [r0, #418]
+; CHECK-NEXT: movw r1, #40862
+; CHECK-NEXT: movt r1, #41376
+; CHECK-NEXT: str.w r1, [r0, #414]
+; CHECK-NEXT: movw r1, #39834
+; CHECK-NEXT: movt r1, #40348
+; CHECK-NEXT: str.w r1, [r0, #410]
+; CHECK-NEXT: movw r1, #38806
+; CHECK-NEXT: movt r1, #39320
+; CHECK-NEXT: str.w r1, [r0, #406]
+; CHECK-NEXT: movw r1, #37778
+; CHECK-NEXT: movt r1, #38292
+; CHECK-NEXT: str.w r1, [r0, #402]
+; CHECK-NEXT: movw r1, #36750
+; CHECK-NEXT: movt r1, #37264
+; CHECK-NEXT: str.w r1, [r0, #398]
+; CHECK-NEXT: movw r1, #35722
+; CHECK-NEXT: movt r1, #36236
+; CHECK-NEXT: str.w r1, [r0, #394]
; CHECK-NEXT: bx lr
%v190 = getelementptr inbounds i8, i8* %v50, i32 394
store i8 -118, i8* %v190, align 1
diff --git a/llvm/test/CodeGen/ARM/misched-fusion-aes.ll b/llvm/test/CodeGen/ARM/misched-fusion-aes.ll
index 483f26cc8e0..b6ca49646f8 100644
--- a/llvm/test/CodeGen/ARM/misched-fusion-aes.ll
+++ b/llvm/test/CodeGen/ARM/misched-fusion-aes.ll
@@ -72,20 +72,27 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
; CHECK-LABEL: aesea:
; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]]
+
; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]]
-; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
+
; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]]
+
+; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]]
+
+; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]]
-; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
+
; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]]
+
; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]]
+
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QH]]
@@ -160,14 +167,14 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]]
; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]]
-; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]]
+; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]]
+; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]]
-; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]]
; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
diff --git a/llvm/test/CodeGen/Mips/fastcc.ll b/llvm/test/CodeGen/Mips/fastcc.ll
index fb1bc4d9a8a..e48dee4721d 100644
--- a/llvm/test/CodeGen/Mips/fastcc.ll
+++ b/llvm/test/CodeGen/Mips/fastcc.ll
@@ -223,24 +223,24 @@ entry:
define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline {
entry:
; CHECK-LABEL: callee1:
-; CHECK-DAG: swc1 $f0
-; CHECK-DAG: swc1 $f1
-; CHECK-DAG: swc1 $f2
-; CHECK-DAG: swc1 $f3
-; CHECK-DAG: swc1 $f4
-; CHECK-DAG: swc1 $f5
-; CHECK-DAG: swc1 $f6
-; CHECK-DAG: swc1 $f7
-; CHECK-DAG: swc1 $f8
-; CHECK-DAG: swc1 $f9
-; CHECK-DAG: swc1 $f10
-; CHECK-DAG: swc1 $f11
-; CHECK-DAG: swc1 $f12
-; CHECK-DAG: swc1 $f13
-; CHECK-DAG: swc1 $f14
-; CHECK-DAG: swc1 $f15
-; CHECK-DAG: swc1 $f16
; CHECK-DAG: swc1 $f17
+; CHECK-DAG: swc1 $f16
+; CHECK-DAG: swc1 $f15
+; CHECK-DAG: swc1 $f14
+; CHECK-DAG: swc1 $f13
+; CHECK-DAG: swc1 $f12
+; CHECK-DAG: swc1 $f11
+; CHECK-DAG: swc1 $f10
+; CHECK-DAG: swc1 $f9
+; CHECK-DAG: swc1 $f8
+; CHECK-DAG: swc1 $f7
+; CHECK-DAG: swc1 $f6
+; CHECK-DAG: swc1 $f5
+; CHECK-DAG: swc1 $f4
+; CHECK-DAG: swc1 $f3
+; CHECK-DAG: swc1 $f2
+; CHECK-DAG: swc1 $f1
+; CHECK-DAG: swc1 $f0
; CHECK-DAG: swc1 $f18
; CHECK-DAG: swc1 $f19
@@ -330,7 +330,7 @@ entry:
; NOODDSPREG-DAG: swc1 $f16, 32($[[R0]])
; NOODDSPREG-DAG: swc1 $f18, 36($[[R0]])
-; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 0($sp)
+; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], {{[0-9]+}}($sp)
; NOODDSPREG-DAG: swc1 $[[F0]], 40($[[R0]])
store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4
diff --git a/llvm/test/CodeGen/SystemZ/pr36164.ll b/llvm/test/CodeGen/SystemZ/pr36164.ll
index 312961fa4b9..2ed6fa901db 100644
--- a/llvm/test/CodeGen/SystemZ/pr36164.ll
+++ b/llvm/test/CodeGen/SystemZ/pr36164.ll
@@ -15,54 +15,39 @@
define void @main() local_unnamed_addr #0 {
; CHECK-LABEL: main:
; CHECK: # %bb.0:
-; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
-; CHECK-NEXT: .cfi_offset %r12, -64
-; CHECK-NEXT: .cfi_offset %r13, -56
-; CHECK-NEXT: .cfi_offset %r14, -48
-; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: lhi %r0, 1
; CHECK-NEXT: larl %r1, g_938
-; CHECK-NEXT: lhi %r2, 2
-; CHECK-NEXT: lhi %r3, 3
-; CHECK-NEXT: lhi %r4, 0
-; CHECK-NEXT: lhi %r5, 4
-; CHECK-NEXT: larl %r14, g_11
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lhi %r3, 4
+; CHECK-NEXT: larl %r4, g_11
; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: strl %r0, g_73
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: strl %r0, g_69
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-DAG: lghi %r13, 24
-; CHECK-DAG: strl %r2, g_69
-; CHECK-DAG: ag %r13, 0(%r1)
-; CHECK-NEXT: lrl %r12, g_832
-; CHECK-NEXT: strl %r3, g_69
-; CHECK-NEXT: lrl %r12, g_832
-; CHECK-NEXT: strl %r4, g_69
-; CHECK-NEXT: lrl %r12, g_832
-; CHECK-NEXT: strl %r0, g_69
-; CHECK-NEXT: lrl %r12, g_832
; CHECK-NEXT: strl %r2, g_69
-; CHECK-NEXT: lrl %r12, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: lrl %r5, g_832
+; CHECK-NEXT: agsi 0(%r1), 24
+; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: strl %r3, g_69
-; CHECK-NEXT: stgrl %r13, g_938
-; CHECK-NEXT: lrl %r13, g_832
-; CHECK-NEXT: strl %r5, g_69
-; CHECK-NEXT: mvi 0(%r14), 1
+; CHECK-NEXT: mvi 0(%r4), 1
; CHECK-NEXT: j .LBB0_1
br label %1
diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll
index 5ccb5825934..eda7f3a2abf 100644
--- a/llvm/test/CodeGen/X86/stores-merging.ll
+++ b/llvm/test/CodeGen/X86/stores-merging.ll
@@ -13,9 +13,8 @@
define void @redundant_stores_merging() {
; CHECK-LABEL: redundant_stores_merging:
; CHECK: # %bb.0:
-; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001
+; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001
; CHECK-NEXT: movq %rax, e+{{.*}}(%rip)
-; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8
; CHECK-NEXT: retq
store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
OpenPOWER on IntegriCloud