diff options
| author | Jun Bum Lim <junbuml@codeaurora.org> | 2016-02-12 15:25:39 +0000 |
|---|---|---|
| committer | Jun Bum Lim <junbuml@codeaurora.org> | 2016-02-12 15:25:39 +0000 |
| commit | 397eb7b0b39b8cee689c991e31be42da936f7d59 (patch) | |
| tree | e514353dfadf0d87467fcdea35be81e25ee92191 | |
| parent | f034a8c7d7719d9be6c775bb74afc97a88238ba4 (diff) | |
| download | bcm5719-llvm-397eb7b0b39b8cee689c991e31be42da936f7d59.tar.gz bcm5719-llvm-397eb7b0b39b8cee689c991e31be42da936f7d59.zip | |
[AArch64] Merge two adjacent str WZR into str XZR
Summary:
This change merges adjacent 32 bit zero stores into a 64 bit zero store.
e.g.,
str wzr, [x0]
str wzr, [x0, #4]
becomes
str xzr, [x0]
Therefore, four adjacent 32 bit zero stores will be a single stp.
e.g.,
str wzr, [x0]
str wzr, [x0, #4]
str wzr, [x0, #8]
str wzr, [x0, #12]
becomes
stp xzr, xzr, [x0]
Reviewers: mcrosier, jmolloy, gberry, t.p.northover
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D16933
llvm-svn: 260682
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 45 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll | 75 |
2 files changed, 105 insertions, 15 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index d2555148ff0..aafff4ef580 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -235,10 +235,6 @@ static bool isNarrowStore(unsigned Opc) { } } -static bool isNarrowStore(MachineInstr *MI) { - return isNarrowStore(MI->getOpcode()); -} - static bool isNarrowLoad(unsigned Opc) { switch (Opc) { default: @@ -386,6 +382,10 @@ static unsigned getMatchingWideOpcode(unsigned Opc) { return AArch64::STURHHi; case AArch64::STURHHi: return AArch64::STURWi; + case AArch64::STURWi: + return AArch64::STURXi; + case AArch64::STRWui: + return AArch64::STRXui; case AArch64::LDRHHui: case AArch64::LDRSHWui: return AArch64::LDRWui; @@ -640,6 +640,16 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst, (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); } +static bool isPromotableZeroStoreOpcode(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi; +} + +static bool isPromotableZeroStoreInst(MachineInstr *MI) { + return (isPromotableZeroStoreOpcode(MI)) && + getLdStRegOp(MI).getReg() == AArch64::WZR; +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator MergeMI, @@ -775,12 +785,12 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I, MergeMI->eraseFromParent(); return NextI; } - assert(isNarrowStore(Opc) && "Expected narrow store"); + assert(isPromotableZeroStoreInst(I) && "Expected promotable zero store"); // Construct the new instruction. MachineInstrBuilder MIB; MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc))) - .addOperand(getLdStRegOp(I)) + .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR) .addOperand(BaseRegOp) .addImm(OffsetImm) .setMemRefs(I->mergeMemRefsWith(*MergeMI)); @@ -1211,7 +1221,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); int Offset = getLdStOffsetOp(FirstMI).getImm(); int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; - bool IsNarrowStore = isNarrowStore(Opc); + bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); // Track which registers have been modified and used between the first insn // (inclusive) and the second insn. @@ -1282,7 +1292,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, continue; } - if (IsNarrowLoad || IsNarrowStore) { + if (IsNarrowLoad || IsPromotableZeroStore) { // If the alignment requirements of the scaled wide load/store // instruction can't express the offset of the scaled narrow // input, bail and keep looking. @@ -1307,7 +1317,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // For narrow stores, allow only when the stored value is the same // (i.e., WZR). if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) || - (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) { + (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; @@ -1633,24 +1643,27 @@ bool AArch64LoadStoreOpt::isCandidateToMergeOrPair(MachineInstr *MI) { // store. bool AArch64LoadStoreOpt::tryToMergeLdStInst( MachineBasicBlock::iterator &MBBI) { - assert((isNarrowLoad(MBBI) || isNarrowStore(MBBI)) && "Expected narrow op."); + assert((isNarrowLoad(MBBI) || isPromotableZeroStoreOpcode(MBBI)) && + "Expected narrow op."); MachineInstr *MI = MBBI; MachineBasicBlock::iterator E = MI->getParent()->end(); if (!isCandidateToMergeOrPair(MI)) return false; - // For narrow stores, find only the case where the stored value is WZR. - if (isNarrowStore(MI) && getLdStRegOp(MI).getReg() != AArch64::WZR) + // For promotable zero stores, the stored value should be WZR. + if (isPromotableZeroStoreOpcode(MI) && + getLdStRegOp(MI).getReg() != AArch64::WZR) return false; // Look ahead up to LdStLimit instructions for a mergable instruction. LdStPairFlags Flags; - MachineBasicBlock::iterator MergeMI = findMatchingInsn(MBBI, Flags, LdStLimit); + MachineBasicBlock::iterator MergeMI = + findMatchingInsn(MBBI, Flags, LdStLimit); if (MergeMI != E) { if (isNarrowLoad(MI)) { ++NumNarrowLoadsPromoted; - } else if (isNarrowStore(MI)) { + } else if (isPromotableZeroStoreInst(MI)) { ++NumZeroStoresPromoted; } // Keeping the iterator straight is a pain, so we let the merge routine tell @@ -1765,13 +1778,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, case AArch64::LDRSHWui: case AArch64::STRBBui: case AArch64::STRHHui: + case AArch64::STRWui: // Unscaled instructions. case AArch64::LDURBBi: case AArch64::LDURHHi: case AArch64::LDURSBWi: case AArch64::LDURSHWi: case AArch64::STURBBi: - case AArch64::STURHHi: { + case AArch64::STURHHi: + case AArch64::STURWi: { if (tryToMergeLdStInst(MBBI)) { Modified = true; break; diff --git a/llvm/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll b/llvm/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll index 5276ac334a7..ec15465eab1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll +++ b/llvm/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll @@ -352,6 +352,42 @@ entry: ret void } +;CHECK-LABEL: Strw_zero +;CHECK : str xzr +define void @Strw_zero(i32* nocapture %P, i32 %n) { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom + store i32 0, i32* %arrayidx + %add = add nsw i32 %n, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1 + store i32 0, i32* %arrayidx2 + ret void +} + +;CHECK-LABEL: Strw_zero_4 +;CHECK : stp xzr +define void @Strw_zero_4(i32* nocapture %P, i32 %n) { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom + store i32 0, i32* %arrayidx + %add = add nsw i32 %n, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1 + store i32 0, i32* %arrayidx2 + %add3 = add nsw i32 %n, 2 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds i32, i32* %P, i64 %idxprom4 + store i32 0, i32* %arrayidx5 + %add6 = add nsw i32 %n, 3 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %P, i64 %idxprom7 + store i32 0, i32* %arrayidx8 + ret void +} + ; CHECK-LABEL: Sturb_zero ; CHECK: sturh wzr define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 { @@ -404,3 +440,42 @@ entry: store i16 0, i16* %arrayidx9 ret void } + +;CHECK-LABEL: Sturw_zero +;CHECK : stur xzr +define void @Sturw_zero(i32* nocapture %P, i32 %n) { +entry: + %sub = add nsw i32 %n, -3 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom + store i32 0, i32* %arrayidx + %sub1 = add nsw i32 %n, -4 + %idxprom2 = sext i32 %sub1 to i64 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2 + store i32 0, i32* %arrayidx3 + ret void +} + +;CHECK-LABEL: Sturw_zero_4 +;CHECK : str xzr +define void @Sturw_zero_4(i32* nocapture %P, i32 %n) { +entry: + %sub = add nsw i32 %n, -3 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom + store i32 0, i32* %arrayidx + %sub1 = add nsw i32 %n, -4 + %idxprom2 = sext i32 %sub1 to i64 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2 + store i32 0, i32* %arrayidx3 + %sub4 = add nsw i32 %n, -2 + %idxprom5 = sext i32 %sub4 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* %P, i64 %idxprom5 + store i32 0, i32* %arrayidx6 + %sub7 = add nsw i32 %n, -1 + %idxprom8 = sext i32 %sub7 to i64 + %arrayidx9 = getelementptr inbounds i32, i32* %P, i64 %idxprom8 + store i32 0, i32* %arrayidx9 + ret void +} + |

