summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp484
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBlockRanges.h240
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp359
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.h8
-rw-r--r--llvm/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll49
-rw-r--r--llvm/test/CodeGen/Hexagon/avoid-predspill.ll3
-rw-r--r--llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll144
-rw-r--r--llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll80
-rw-r--r--llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll80
10 files changed, 1442 insertions, 6 deletions
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index f7b7990d2b3..b329948ee7d 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_target(HexagonCodeGen
HexagonAsmPrinter.cpp
HexagonBitSimplify.cpp
HexagonBitTracker.cpp
+ HexagonBlockRanges.cpp
HexagonCFGOptimizer.cpp
HexagonCommonGEP.cpp
HexagonCopyToCombine.cpp
diff --git a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
new file mode 100644
index 00000000000..ac5449afdc6
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -0,0 +1,484 @@
+//===--- HexagonBlockRanges.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hbr"
+
+#include "HexagonBlockRanges.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+#include <vector>
+
+using namespace llvm;
+
+bool HexagonBlockRanges::IndexRange::overlaps(const IndexRange &A) const {
+ // If A contains start(), or "this" contains A.start(), then overlap.
+ IndexType S = start(), E = end(), AS = A.start(), AE = A.end();
+ if (AS == S)
+ return true;
+ bool SbAE = (S < AE) || (S == AE && A.TiedEnd); // S-before-AE.
+ bool ASbE = (AS < E) || (AS == E && TiedEnd); // AS-before-E.
+ if ((AS < S && SbAE) || (S < AS && ASbE))
+ return true;
+ // Otherwise no overlap.
+ return false;
+}
+
+
+bool HexagonBlockRanges::IndexRange::contains(const IndexRange &A) const {
+ if (start() <= A.start()) {
+ // Treat "None" in the range end as equal to the range start.
+ IndexType E = (end() != IndexType::None) ? end() : start();
+ IndexType AE = (A.end() != IndexType::None) ? A.end() : A.start();
+ if (AE <= E)
+ return true;
+ }
+ return false;
+}
+
+
+void HexagonBlockRanges::IndexRange::merge(const IndexRange &A) {
+ // Allow merging adjacent ranges.
+ assert(end() == A.start() || overlaps(A));
+ IndexType AS = A.start(), AE = A.end();
+ if (AS < start() || start() == IndexType::None)
+ setStart(AS);
+ if (end() < AE || end() == IndexType::None) {
+ setEnd(AE);
+ TiedEnd = A.TiedEnd;
+ } else {
+ if (end() == AE)
+ TiedEnd |= A.TiedEnd;
+ }
+ if (A.Fixed)
+ Fixed = true;
+}
+
+
+void HexagonBlockRanges::RangeList::include(const RangeList &RL) {
+ for (auto &R : RL)
+ if (std::find(begin(), end(), R) == end())
+ push_back(R);
+}
+
+
+// Merge all overlapping ranges in the list, so that all that remains
+// is a list of disjoint ranges.
+void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) {
+ if (empty())
+ return;
+
+ std::sort(begin(), end());
+ iterator Iter = begin();
+
+ while (Iter != end()-1) {
+ iterator Next = std::next(Iter);
+ // If MergeAdjacent is true, merge ranges A and B, where A.end == B.start.
+ // This allows merging dead ranges, but is not valid for live ranges.
+ bool Merge = MergeAdjacent && (Iter->end() == Next->start());
+ if (Merge || Iter->overlaps(*Next)) {
+ Iter->merge(*Next);
+ erase(Next);
+ continue;
+ }
+ ++Iter;
+ }
+}
+
+
+// Compute a range A-B and add it to the list.
+void HexagonBlockRanges::RangeList::addsub(const IndexRange &A,
+ const IndexRange &B) {
+ // Exclusion of non-overlapping ranges makes some checks simpler
+ // later in this function.
+ if (!A.overlaps(B)) {
+ // A - B = A.
+ add(A);
+ return;
+ }
+
+ IndexType AS = A.start(), AE = A.end();
+ IndexType BS = B.start(), BE = B.end();
+
+ // If AE is None, then A is included in B, since A and B overlap.
+ // The result of subtraction if empty, so just return.
+ if (AE == IndexType::None)
+ return;
+
+ if (AS < BS) {
+ // A starts before B.
+ // AE cannot be None since A and B overlap.
+ assert(AE != IndexType::None);
+ // Add the part of A that extends on the "less" side of B.
+ add(AS, BS, A.Fixed, false);
+ }
+
+ if (BE < AE) {
+ // BE cannot be Exit here.
+ if (BE == IndexType::None)
+ add(BS, AE, A.Fixed, false);
+ else
+ add(BE, AE, A.Fixed, false);
+ }
+}
+
+
+// Subtract a given range from each element in the list.
+void HexagonBlockRanges::RangeList::subtract(const IndexRange &Range) {
+ // Cannot assume that the list is unionized (i.e. contains only non-
+ // overlapping ranges.
+ RangeList T;
+ for (iterator Next, I = begin(); I != end(); I = Next) {
+ IndexRange &Rg = *I;
+ if (Rg.overlaps(Range)) {
+ T.addsub(Rg, Range);
+ Next = this->erase(I);
+ } else {
+ Next = std::next(I);
+ }
+ }
+ include(T);
+}
+
+
+HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B)
+ : Block(B) {
+ IndexType Idx = IndexType::First;
+ First = Idx;
+ for (auto &In : B) {
+ if (In.isDebugValue())
+ continue;
+ assert(getIndex(&In) == IndexType::None && "Instruction already in map");
+ Map.insert(std::make_pair(Idx, &In));
+ ++Idx;
+ }
+ Last = B.empty() ? IndexType::None : unsigned(Idx)-1;
+}
+
+
+MachineInstr *HexagonBlockRanges::InstrIndexMap::getInstr(IndexType Idx) const {
+ auto F = Map.find(Idx);
+ return (F != Map.end()) ? F->second : 0;
+}
+
+
+HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getIndex(
+ MachineInstr *MI) const {
+ for (auto &I : Map)
+ if (I.second == MI)
+ return I.first;
+ return IndexType::None;
+}
+
+
+HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getPrevIndex(
+ IndexType Idx) const {
+ assert (Idx != IndexType::None);
+ if (Idx == IndexType::Entry)
+ return IndexType::None;
+ if (Idx == IndexType::Exit)
+ return Last;
+ if (Idx == First)
+ return IndexType::Entry;
+ return unsigned(Idx)-1;
+}
+
+
+HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getNextIndex(
+ IndexType Idx) const {
+ assert (Idx != IndexType::None);
+ if (Idx == IndexType::Entry)
+ return IndexType::First;
+ if (Idx == IndexType::Exit || Idx == Last)
+ return IndexType::None;
+ return unsigned(Idx)+1;
+}
+
+
+void HexagonBlockRanges::InstrIndexMap::replaceInstr(MachineInstr *OldMI,
+ MachineInstr *NewMI) {
+ for (auto &I : Map) {
+ if (I.second != OldMI)
+ continue;
+ if (NewMI != nullptr)
+ I.second = NewMI;
+ else
+ Map.erase(I.first);
+ break;
+ }
+}
+
+
+HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf)
+ : MF(mf), HST(mf.getSubtarget<HexagonSubtarget>()),
+ TII(*HST.getInstrInfo()), TRI(*HST.getRegisterInfo()),
+ Reserved(TRI.getReservedRegs(mf)) {
+ // Consider all non-allocatable registers as reserved.
+ for (auto I = TRI.regclass_begin(), E = TRI.regclass_end(); I != E; ++I) {
+ auto *RC = *I;
+ if (RC->isAllocatable())
+ continue;
+ for (unsigned R : *RC)
+ Reserved[R] = true;
+ }
+}
+
+
+HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns(
+ const MachineBasicBlock &B) {
+ RegisterSet LiveIns;
+ for (auto I : B.liveins())
+ if (!Reserved[I.PhysReg])
+ LiveIns.insert({I.PhysReg, 0});
+ return LiveIns;
+}
+
+
+HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs(
+ RegisterRef R, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) {
+ RegisterSet SRs;
+
+ if (R.Sub != 0) {
+ SRs.insert(R);
+ return SRs;
+ }
+
+ if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) {
+ MCSubRegIterator I(R.Reg, &TRI);
+ if (!I.isValid())
+ SRs.insert({R.Reg, 0});
+ for (; I.isValid(); ++I)
+ SRs.insert({*I, 0});
+ } else {
+ assert(TargetRegisterInfo::isVirtualRegister(R.Reg));
+ auto &RC = *MRI.getRegClass(R.Reg);
+ unsigned PReg = *RC.begin();
+ MCSubRegIndexIterator I(PReg, &TRI);
+ if (!I.isValid())
+ SRs.insert({R.Reg, 0});
+ for (; I.isValid(); ++I)
+ SRs.insert({R.Reg, I.getSubRegIndex()});
+ }
+ return SRs;
+}
+
+
+void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
+ RegToRangeMap &LiveMap) {
+ std::map<RegisterRef,IndexType> LastDef, LastUse;
+ RegisterSet LiveOnEntry;
+ MachineBasicBlock &B = IndexMap.getBlock();
+ MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
+
+ for (auto R : getLiveIns(B))
+ for (auto S : expandToSubRegs(R, MRI, TRI))
+ LiveOnEntry.insert(S);
+
+ for (auto R : LiveOnEntry)
+ LastDef[R] = IndexType::Entry;
+
+ auto closeRange = [&LastUse,&LastDef,&LiveMap] (RegisterRef R) -> void {
+ auto LD = LastDef[R], LU = LastUse[R];
+ if (LD == IndexType::None)
+ LD = IndexType::Entry;
+ if (LU == IndexType::None)
+ LU = IndexType::Exit;
+ LiveMap[R].add(LD, LU, false, false);
+ LastUse[R] = LastDef[R] = IndexType::None;
+ };
+
+ for (auto &In : B) {
+ if (In.isDebugValue())
+ continue;
+ IndexType Index = IndexMap.getIndex(&In);
+ // Process uses first.
+ for (auto &Op : In.operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.isUndef())
+ continue;
+ RegisterRef R = { Op.getReg(), Op.getSubReg() };
+ if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
+ continue;
+ bool IsKill = Op.isKill();
+ for (auto S : expandToSubRegs(R, MRI, TRI)) {
+ LastUse[S] = Index;
+ if (IsKill)
+ closeRange(S);
+ }
+ }
+ // Process defs.
+ for (auto &Op : In.operands()) {
+ if (!Op.isReg() || !Op.isDef() || Op.isUndef())
+ continue;
+ RegisterRef R = { Op.getReg(), Op.getSubReg() };
+ if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
+ continue;
+ for (auto S : expandToSubRegs(R, MRI, TRI)) {
+ if (LastDef[S] != IndexType::None)
+ closeRange(S);
+ LastDef[S] = Index;
+ }
+ }
+ }
+
+ // Collect live-on-exit.
+ RegisterSet LiveOnExit;
+ for (auto *SB : B.successors())
+ for (auto R : getLiveIns(*SB))
+ for (auto S : expandToSubRegs(R, MRI, TRI))
+ LiveOnExit.insert(S);
+
+ for (auto R : LiveOnExit)
+ LastUse[R] = IndexType::Exit;
+
+ // Process remaining registers.
+ RegisterSet Left;
+ for (auto &I : LastUse)
+ if (I.second != IndexType::None)
+ Left.insert(I.first);
+ for (auto &I : LastDef)
+ if (I.second != IndexType::None)
+ Left.insert(I.first);
+ for (auto R : Left)
+ closeRange(R);
+
+ // Finalize the live ranges.
+ for (auto &P : LiveMap)
+ P.second.unionize();
+}
+
+
+HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeLiveMap(
+ InstrIndexMap &IndexMap) {
+ RegToRangeMap LiveMap;
+ DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n');
+ computeInitialLiveRanges(IndexMap, LiveMap);
+ DEBUG(dbgs() << __func__ << ": live map\n"
+ << PrintRangeMap(LiveMap, TRI) << '\n');
+ return LiveMap;
+}
+
+
+HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap(
+ InstrIndexMap &IndexMap, RegToRangeMap &LiveMap) {
+ RegToRangeMap DeadMap;
+
+ auto addDeadRanges = [&IndexMap,&LiveMap,&DeadMap] (RegisterRef R) -> void {
+ auto F = LiveMap.find(R);
+ if (F == LiveMap.end() || F->second.empty()) {
+ DeadMap[R].add(IndexType::Entry, IndexType::Exit, false, false);
+ return;
+ }
+
+ RangeList &RL = F->second;
+ RangeList::iterator A = RL.begin(), Z = RL.end()-1;
+
+ // Try to create the initial range.
+ if (A->start() != IndexType::Entry) {
+ IndexType DE = IndexMap.getPrevIndex(A->start());
+ if (DE != IndexType::Entry)
+ DeadMap[R].add(IndexType::Entry, DE, false, false);
+ }
+
+ while (A != Z) {
+ // Creating a dead range that follows A. Pay attention to empty
+ // ranges (i.e. those ending with "None").
+ IndexType AE = (A->end() == IndexType::None) ? A->start() : A->end();
+ IndexType DS = IndexMap.getNextIndex(AE);
+ ++A;
+ IndexType DE = IndexMap.getPrevIndex(A->start());
+ if (DS < DE)
+ DeadMap[R].add(DS, DE, false, false);
+ }
+
+ // Try to create the final range.
+ if (Z->end() != IndexType::Exit) {
+ IndexType ZE = (Z->end() == IndexType::None) ? Z->start() : Z->end();
+ IndexType DS = IndexMap.getNextIndex(ZE);
+ if (DS < IndexType::Exit)
+ DeadMap[R].add(DS, IndexType::Exit, false, false);
+ }
+ };
+
+ MachineFunction &MF = *IndexMap.getBlock().getParent();
+ auto &MRI = MF.getRegInfo();
+ unsigned NumRegs = TRI.getNumRegs();
+ BitVector Visited(NumRegs);
+ for (unsigned R = 1; R < NumRegs; ++R) {
+ for (auto S : expandToSubRegs({R,0}, MRI, TRI)) {
+ if (Reserved[S.Reg] || Visited[S.Reg])
+ continue;
+ addDeadRanges(S);
+ Visited[S.Reg] = true;
+ }
+ }
+ for (auto &P : LiveMap)
+ if (TargetRegisterInfo::isVirtualRegister(P.first.Reg))
+ addDeadRanges(P.first);
+
+ DEBUG(dbgs() << __func__ << ": dead map\n"
+ << PrintRangeMap(DeadMap, TRI) << '\n');
+ return DeadMap;
+}
+
+
+raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx) {
+ if (Idx == HexagonBlockRanges::IndexType::None)
+ return OS << '-';
+ if (Idx == HexagonBlockRanges::IndexType::Entry)
+ return OS << 'n';
+ if (Idx == HexagonBlockRanges::IndexType::Exit)
+ return OS << 'x';
+ return OS << unsigned(Idx)-HexagonBlockRanges::IndexType::First+1;
+}
+
+// A mapping to translate between instructions and their indices.
+raw_ostream &operator<< (raw_ostream &OS,
+ const HexagonBlockRanges::IndexRange &IR) {
+ OS << '[' << IR.start() << ':' << IR.end() << (IR.TiedEnd ? '}' : ']');
+ if (IR.Fixed)
+ OS << '!';
+ return OS;
+}
+
+raw_ostream &operator<< (raw_ostream &OS,
+ const HexagonBlockRanges::RangeList &RL) {
+ for (auto &R : RL)
+ OS << R << " ";
+ return OS;
+}
+
+raw_ostream &operator<< (raw_ostream &OS,
+ const HexagonBlockRanges::InstrIndexMap &M) {
+ for (auto &In : M.Block) {
+ HexagonBlockRanges::IndexType Idx = M.getIndex(&In);
+ OS << Idx << (Idx == M.Last ? ". " : " ") << In;
+ }
+ return OS;
+}
+
+raw_ostream &operator<< (raw_ostream &OS,
+ const HexagonBlockRanges::PrintRangeMap &P) {
+ for (auto &I : P.Map) {
+ const HexagonBlockRanges::RangeList &RL = I.second;
+ OS << PrintReg(I.first.Reg, &P.TRI, I.first.Sub) << " -> " << RL << "\n";
+ }
+ return OS;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonBlockRanges.h b/llvm/lib/Target/Hexagon/HexagonBlockRanges.h
new file mode 100644
index 00000000000..a7f8fd2c8dd
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonBlockRanges.h
@@ -0,0 +1,240 @@
+//===--- HexagonBlockRanges.h ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef HEXAGON_BLOCK_RANGES_H
+#define HEXAGON_BLOCK_RANGES_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCRegisterInfo.h" // For MCPhysReg.
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+ class Function;
+ class HexagonSubtarget;
+ class MachineBasicBlock;
+ class MachineFunction;
+ class MachineInstr;
+ class MCInstrDesc;
+ class raw_ostream;
+ class TargetInstrInfo;
+ class TargetRegisterClass;
+ class TargetRegisterInfo;
+ class Type;
+}
+
+using namespace llvm;
+
+struct HexagonBlockRanges {
+ HexagonBlockRanges(MachineFunction &MF);
+
+ struct RegisterRef {
+ unsigned Reg, Sub;
+ bool operator<(RegisterRef R) const {
+ return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub);
+ }
+ };
+ typedef std::set<RegisterRef> RegisterSet;
+
+ // This is to represent an "index", which is an abstraction of a position
+ // of an instruction within a basic block.
+ class IndexType {
+ public:
+ enum : unsigned {
+ None = 0,
+ Entry = 1,
+ Exit = 2,
+ First = 11 // 10th + 1st
+ };
+ static bool isInstr(IndexType X) { return X.Index >= First; }
+
+ IndexType() : Index(None) {}
+ IndexType(unsigned Idx) : Index(Idx) {}
+ operator unsigned() const;
+ bool operator== (unsigned x) const;
+ bool operator== (IndexType Idx) const;
+ bool operator!= (unsigned x) const;
+ bool operator!= (IndexType Idx) const;
+ IndexType operator++ ();
+ bool operator< (unsigned Idx) const;
+ bool operator< (IndexType Idx) const;
+ bool operator<= (IndexType Idx) const;
+
+ private:
+ bool operator> (IndexType Idx) const;
+ bool operator>= (IndexType Idx) const;
+
+ unsigned Index;
+ };
+
+ // A range of indices, essentially a representation of a live range.
+ // This is also used to represent "dead ranges", i.e. ranges where a
+ // register is dead.
+ class IndexRange : public std::pair<IndexType,IndexType> {
+ public:
+ IndexRange() : Fixed(false), TiedEnd(false) {}
+ IndexRange(IndexType Start, IndexType End, bool F = false, bool T = false)
+ : std::pair<IndexType,IndexType>(Start, End), Fixed(F), TiedEnd(T) {}
+ IndexType start() const { return first; }
+ IndexType end() const { return second; }
+
+ bool operator< (const IndexRange &A) const {
+ return start() < A.start();
+ }
+ bool overlaps(const IndexRange &A) const;
+ bool contains(const IndexRange &A) const;
+ void merge(const IndexRange &A);
+
+ bool Fixed; // Can be renamed? "Fixed" means "no".
+ bool TiedEnd; // The end is not a use, but a dead def tied to a use.
+
+ private:
+ void setStart(const IndexType &S) { first = S; }
+ void setEnd(const IndexType &E) { second = E; }
+ };
+
+ // A list of index ranges. This represents liveness of a register
+ // in a basic block.
+ class RangeList : public std::vector<IndexRange> {
+ public:
+ void add(IndexType Start, IndexType End, bool Fixed, bool TiedEnd) {
+ push_back(IndexRange(Start, End, Fixed, TiedEnd));
+ }
+ void add(const IndexRange &Range) {
+ push_back(Range);
+ }
+ void include(const RangeList &RL);
+ void unionize(bool MergeAdjacent = false);
+ void subtract(const IndexRange &Range);
+
+ private:
+ void addsub(const IndexRange &A, const IndexRange &B);
+ };
+
+ class InstrIndexMap {
+ public:
+ InstrIndexMap(MachineBasicBlock &B);
+ MachineInstr *getInstr(IndexType Idx) const;
+ IndexType getIndex(MachineInstr *MI) const;
+ MachineBasicBlock &getBlock() const { return Block; }
+ IndexType getPrevIndex(IndexType Idx) const;
+ IndexType getNextIndex(IndexType Idx) const;
+ void replaceInstr(MachineInstr *OldMI, MachineInstr *NewMI);
+
+ friend raw_ostream &operator<< (raw_ostream &OS, const InstrIndexMap &Map);
+ IndexType First, Last;
+
+ private:
+ MachineBasicBlock &Block;
+ std::map<IndexType,MachineInstr*> Map;
+ };
+
+ typedef std::map<RegisterRef,RangeList> RegToRangeMap;
+ RegToRangeMap computeLiveMap(InstrIndexMap &IndexMap);
+ RegToRangeMap computeDeadMap(InstrIndexMap &IndexMap, RegToRangeMap &LiveMap);
+ static RegisterSet expandToSubRegs(RegisterRef R,
+ const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI);
+
+ struct PrintRangeMap {
+ PrintRangeMap(const RegToRangeMap &M, const TargetRegisterInfo &I)
+ : Map(M), TRI(I) {}
+
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintRangeMap &P);
+ private:
+ const RegToRangeMap &Map;
+ const TargetRegisterInfo &TRI;
+ };
+
+private:
+ RegisterSet getLiveIns(const MachineBasicBlock &B);
+
+ void computeInitialLiveRanges(InstrIndexMap &IndexMap,
+ RegToRangeMap &LiveMap);
+
+ MachineFunction &MF;
+ const HexagonSubtarget &HST;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ BitVector Reserved;
+};
+
+
+inline HexagonBlockRanges::IndexType::operator unsigned() const {
+ assert(Index >= First);
+ return Index;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator== (unsigned x) const {
+ return Index == x;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator== (IndexType Idx) const {
+ return Index == Idx.Index;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator!= (unsigned x) const {
+ return Index != x;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator!= (IndexType Idx) const {
+ return Index != Idx.Index;
+}
+
+inline
+HexagonBlockRanges::IndexType HexagonBlockRanges::IndexType::operator++ () {
+ assert(Index != None);
+ assert(Index != Exit);
+ if (Index == Entry)
+ Index = First;
+ else
+ ++Index;
+ return *this;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator< (unsigned Idx) const {
+ return operator< (IndexType(Idx));
+}
+
+inline bool HexagonBlockRanges::IndexType::operator< (IndexType Idx) const {
+ // !(x < x).
+ if (Index == Idx.Index)
+ return false;
+ // !(None < x) for all x.
+ // !(x < None) for all x.
+ if (Index == None || Idx.Index == None)
+ return false;
+ // !(Exit < x) for all x.
+ // !(x < Entry) for all x.
+ if (Index == Exit || Idx.Index == Entry)
+ return false;
+ // Entry < x for all x != Entry.
+ // x < Exit for all x != Exit.
+ if (Index == Entry || Idx.Index == Exit)
+ return true;
+
+ return Index < Idx.Index;
+}
+
+inline bool HexagonBlockRanges::IndexType::operator<= (IndexType Idx) const {
+ return operator==(Idx) || operator<(Idx);
+}
+
+
+raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx);
+raw_ostream &operator<< (raw_ostream &OS,
+ const HexagonBlockRanges::IndexRange &IR);
+raw_ostream &operator<< (raw_ostream &OS,
+ const HexagonBlockRanges::RangeList &RL);
+raw_ostream &operator<< (raw_ostream &OS,
+ const HexagonBlockRanges::InstrIndexMap &M);
+raw_ostream &operator<< (raw_ostream &OS,
+ const HexagonBlockRanges::PrintRangeMap &P);
+
+#endif
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index cf4d110911f..ac3242b7922 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -10,6 +10,7 @@
#define DEBUG_TYPE "hexagon-pei"
+#include "HexagonBlockRanges.h"
#include "HexagonFrameLowering.h"
#include "HexagonInstrInfo.h"
#include "HexagonMachineFunctionInfo.h"
@@ -147,6 +148,9 @@ static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
cl::Hidden, cl::desc("Use allocframe more conservatively"));
+static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
+ cl::init(true), cl::desc("Optimize spill slots"));
+
namespace llvm {
void initializeHexagonCallFrameInformationPass(PassRegistry&);
@@ -1046,13 +1050,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
// Check for an unused caller-saved register.
for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
MCPhysReg FreeReg = *CallerSavedRegs;
- if (!MRI.reg_nodbg_empty(FreeReg))
+ if (MRI.isPhysRegUsed(FreeReg))
continue;
// Check aliased register usage.
bool IsCurrentRegUsed = false;
for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
- if (!MRI.reg_nodbg_empty(*AI)) {
+ if (MRI.isPhysRegUsed(*AI)) {
IsCurrentRegUsed = true;
break;
}
@@ -1634,7 +1638,8 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Replace predicate register pseudo spill code.
SmallVector<unsigned,8> NewRegs;
expandSpillMacros(MF, NewRegs);
-
+ if (OptimizeSpillSlots)
+ optimizeSpillSlots(MF, NewRegs);
// We need to reserve a a spill slot if scavenging could potentially require
// spilling a scavenged register.
@@ -1665,6 +1670,354 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
+unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF,
+ HexagonBlockRanges::IndexRange &FIR,
+ HexagonBlockRanges::InstrIndexMap &IndexMap,
+ HexagonBlockRanges::RegToRangeMap &DeadMap,
+ const TargetRegisterClass *RC) const {
+ auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ auto &MRI = MF.getRegInfo();
+
+ auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool {
+ auto F = DeadMap.find({Reg,0});
+ if (F == DeadMap.end())
+ return false;
+ for (auto &DR : F->second)
+ if (DR.contains(FIR))
+ return true;
+ return false;
+ };
+
+ for (unsigned Reg : RC->getRawAllocationOrder(MF)) {
+ bool Dead = true;
+ for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
+ if (isDead(R.Reg))
+ continue;
+ Dead = false;
+ break;
+ }
+ if (Dead)
+ return Reg;
+ }
+ return 0;
+}
+
+void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
+ SmallVectorImpl<unsigned> &VRegs) const {
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+ auto &MRI = MF.getRegInfo();
+ HexagonBlockRanges HBR(MF);
+
+ typedef std::map<MachineBasicBlock*,HexagonBlockRanges::InstrIndexMap>
+ BlockIndexMap;
+ typedef std::map<MachineBasicBlock*,HexagonBlockRanges::RangeList>
+ BlockRangeMap;
+ typedef HexagonBlockRanges::IndexType IndexType;
+
+ struct SlotInfo {
+ BlockRangeMap Map;
+ unsigned Size = 0;
+ const TargetRegisterClass *RC = nullptr;
+ };
+
+ BlockIndexMap BlockIndexes;
+ SmallSet<int,4> BadFIs;
+ std::map<int,SlotInfo> FIRangeMap;
+
+ auto getRegClass = [&MRI,&HRI] (HexagonBlockRanges::RegisterRef R)
+ -> const TargetRegisterClass* {
+ if (TargetRegisterInfo::isPhysicalRegister(R.Reg))
+ assert(R.Sub == 0);
+ if (TargetRegisterInfo::isVirtualRegister(R.Reg)) {
+ auto *RCR = MRI.getRegClass(R.Reg);
+ if (R.Sub == 0)
+ return RCR;
+ unsigned PR = *RCR->begin();
+ R.Reg = HRI.getSubReg(PR, R.Sub);
+ }
+ return HRI.getMinimalPhysRegClass(R.Reg);
+ };
+ // Accumulate register classes: get a common class for a pre-existing
+ // class HaveRC and a new class NewRC. Return nullptr if a common class
+ // cannot be found, otherwise return the resulting class. If HaveRC is
+ // nullptr, assume that it is still unset.
+ auto getCommonRC = [&HRI] (const TargetRegisterClass *HaveRC,
+ const TargetRegisterClass *NewRC)
+ -> const TargetRegisterClass* {
+ if (HaveRC == nullptr || HaveRC == NewRC)
+ return NewRC;
+ // Different classes, both non-null. Pick the more general one.
+ if (HaveRC->hasSubClassEq(NewRC))
+ return HaveRC;
+ if (NewRC->hasSubClassEq(HaveRC))
+ return NewRC;
+ return nullptr;
+ };
+
+ // Scan all blocks in the function. Check all occurrences of frame indexes,
+ // and collect relevant information.
+ for (auto &B : MF) {
+ std::map<int,IndexType> LastStore, LastLoad;
+ auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
+ auto &IndexMap = P.first->second;
+ DEBUG(dbgs() << "Index map for BB#" << B.getNumber() << "\n"
+ << IndexMap << '\n');
+
+ for (auto &In : B) {
+ int LFI, SFI;
+ bool Load = HII.isLoadFromStackSlot(&In, LFI) && !HII.isPredicated(&In);
+ bool Store = HII.isStoreToStackSlot(&In, SFI) && !HII.isPredicated(&In);
+ if (Load && Store) {
+ // If it's both a load and a store, then we won't handle it.
+ BadFIs.insert(LFI);
+ BadFIs.insert(SFI);
+ continue;
+ }
+ // Check for register classes of the register used as the source for
+ // the store, and the register used as the destination for the load.
+ // Also, only accept base+imm_offset addressing modes. Other addressing
+ // modes can have side-effects (post-increments, etc.). For stack
+ // slots they are very unlikely, so there is not much loss due to
+ // this restriction.
+ if (Load || Store) {
+ int TFI = Load ? LFI : SFI;
+ unsigned AM = HII.getAddrMode(&In);
+ SlotInfo &SI = FIRangeMap[TFI];
+ bool Bad = (AM != HexagonII::BaseImmOffset);
+ if (!Bad) {
+ // If the addressing mode is ok, check the register class.
+ const TargetRegisterClass *RC = nullptr;
+ if (Load) {
+ MachineOperand &DataOp = In.getOperand(0);
+ RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
+ } else {
+ MachineOperand &DataOp = In.getOperand(2);
+ RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
+ }
+ RC = getCommonRC(SI.RC, RC);
+ if (RC == nullptr)
+ Bad = true;
+ else
+ SI.RC = RC;
+ }
+ if (!Bad) {
+ // Check sizes.
+ unsigned S = (1U << (HII.getMemAccessSize(&In) - 1));
+ if (SI.Size != 0 && SI.Size != S)
+ Bad = true;
+ else
+ SI.Size = S;
+ }
+ if (Bad)
+ BadFIs.insert(TFI);
+ }
+
+ // Locate uses of frame indices.
+ for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) {
+ const MachineOperand &Op = In.getOperand(i);
+ if (!Op.isFI())
+ continue;
+ int FI = Op.getIndex();
+ // Make sure that the following operand is an immediate and that
+ // it is 0. This is the offset in the stack object.
+ if (i+1 >= n || !In.getOperand(i+1).isImm() ||
+ In.getOperand(i+1).getImm() != 0)
+ BadFIs.insert(FI);
+ if (BadFIs.count(FI))
+ continue;
+
+ IndexType Index = IndexMap.getIndex(&In);
+ if (Load) {
+ if (LastStore[FI] == IndexType::None)
+ LastStore[FI] = IndexType::Entry;
+ LastLoad[FI] = Index;
+ } else if (Store) {
+ HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
+ if (LastStore[FI] != IndexType::None)
+ RL.add(LastStore[FI], LastLoad[FI], false, false);
+ else if (LastLoad[FI] != IndexType::None)
+ RL.add(IndexType::Entry, LastLoad[FI], false, false);
+ LastLoad[FI] = IndexType::None;
+ LastStore[FI] = Index;
+ } else {
+ BadFIs.insert(FI);
+ }
+ }
+ }
+
+ for (auto &I : LastLoad) {
+ IndexType LL = I.second;
+ if (LL == IndexType::None)
+ continue;
+ auto &RL = FIRangeMap[I.first].Map[&B];
+ IndexType &LS = LastStore[I.first];
+ if (LS != IndexType::None)
+ RL.add(LS, LL, false, false);
+ else
+ RL.add(IndexType::Entry, LL, false, false);
+ LS = IndexType::None;
+ }
+ for (auto &I : LastStore) {
+ IndexType LS = I.second;
+ if (LS == IndexType::None)
+ continue;
+ auto &RL = FIRangeMap[I.first].Map[&B];
+ RL.add(LS, IndexType::None, false, false);
+ }
+ }
+
+ DEBUG({
+ for (auto &P : FIRangeMap) {
+ dbgs() << "fi#" << P.first;
+ if (BadFIs.count(P.first))
+ dbgs() << " (bad)";
+ dbgs() << " RC: ";
+ if (P.second.RC != nullptr)
+ dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
+ else
+ dbgs() << "<null>\n";
+ for (auto &R : P.second.Map)
+ dbgs() << " BB#" << R.first->getNumber() << " { " << R.second << "}\n";
+ }
+ });
+
+ // When a slot is loaded from in a block without being stored to in the
+ // same block, it is live-on-entry to this block. To avoid CFG analysis,
+ // consider this slot to be live-on-exit from all blocks.
+ SmallSet<int,4> LoxFIs;
+
+ std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
+
+ for (auto &P : FIRangeMap) {
+ // P = pair(FI, map: BB->RangeList)
+ if (BadFIs.count(P.first))
+ continue;
+ for (auto &B : MF) {
+ auto F = P.second.Map.find(&B);
+ // F = pair(BB, RangeList)
+ if (F == P.second.Map.end() || F->second.empty())
+ continue;
+ HexagonBlockRanges::IndexRange &IR = F->second.front();
+ if (IR.start() == IndexType::Entry)
+ LoxFIs.insert(P.first);
+ BlockFIMap[&B].push_back(P.first);
+ }
+ }
+
+ DEBUG({
+ dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
+ for (auto &P : BlockFIMap) {
+ auto &FIs = P.second;
+ if (FIs.empty())
+ continue;
+ dbgs() << " BB#" << P.first->getNumber() << ": {";
+ for (auto I : FIs) {
+ dbgs() << " fi#" << I;
+ if (LoxFIs.count(I))
+ dbgs() << '*';
+ }
+ dbgs() << " }\n";
+ }
+ });
+
+ // eliminate loads, when all loads eliminated, eliminate all stores.
+ for (auto &B : MF) {
+ auto F = BlockIndexes.find(&B);
+ assert(F != BlockIndexes.end());
+ HexagonBlockRanges::InstrIndexMap &IM = F->second;
+ HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
+ HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
+ DEBUG(dbgs() << "BB#" << B.getNumber() << " dead map\n"
+ << HexagonBlockRanges::PrintRangeMap(DM, HRI));
+
+ for (auto FI : BlockFIMap[&B]) {
+ if (BadFIs.count(FI))
+ continue;
+ DEBUG(dbgs() << "Working on fi#" << FI << '\n');
+ HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
+ for (auto &Range : RL) {
+ DEBUG(dbgs() << "--Examining range:" << RL << '\n');
+ if (!IndexType::isInstr(Range.start()) ||
+ !IndexType::isInstr(Range.end()))
+ continue;
+ MachineInstr *SI = IM.getInstr(Range.start());
+ MachineInstr *EI = IM.getInstr(Range.end());
+ assert(SI->mayStore() && "Unexpected start instruction");
+ assert(EI->mayLoad() && "Unexpected end instruction");
+ MachineOperand &SrcOp = SI->getOperand(2);
+
+ HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
+ SrcOp.getSubReg() };
+ auto *RC = getRegClass({SrcOp.getReg(), SrcOp.getSubReg()});
+ // The this-> is needed to unconfuse MSVC.
+ unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
+ DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n');
+ if (FoundR == 0)
+ continue;
+
+ // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
+ MachineBasicBlock::iterator StartIt = SI, NextIt;
+ MachineInstr *CopyIn = nullptr;
+ if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
+ DebugLoc DL = SI->getDebugLoc();
+ CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
+ .addOperand(SrcOp);
+ }
+
+ ++StartIt;
+ // Check if this is a last store and the FI is live-on-exit.
+ if (LoxFIs.count(FI) && (&Range == &RL.back())) {
+ // Update store's source register.
+ if (unsigned SR = SrcOp.getSubReg())
+ SrcOp.setReg(HRI.getSubReg(FoundR, SR));
+ else
+ SrcOp.setReg(FoundR);
+ SrcOp.setSubReg(0);
+ // We are keeping this register live.
+ SrcOp.setIsKill(false);
+ } else {
+ B.erase(SI);
+ IM.replaceInstr(SI, CopyIn);
+ }
+
+ auto EndIt = std::next(MachineBasicBlock::iterator(EI));
+ for (auto It = StartIt; It != EndIt; It = NextIt) {
+ MachineInstr *MI = &*It;
+ NextIt = std::next(It);
+ int TFI;
+ if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)
+ continue;
+ unsigned DstR = MI->getOperand(0).getReg();
+ assert(MI->getOperand(0).getSubReg() == 0);
+ MachineInstr *CopyOut = nullptr;
+ if (DstR != FoundR) {
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned MemSize = (1U << (HII.getMemAccessSize(MI) - 1));
+ assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
+ unsigned CopyOpc = TargetOpcode::COPY;
+ if (HII.isSignExtendingLoad(MI))
+ CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
+ else if (HII.isZeroExtendingLoad(MI))
+ CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
+ CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
+ .addReg(FoundR, getKillRegState(MI == EI));
+ }
+ IM.replaceInstr(MI, CopyOut);
+ B.erase(It);
+ }
+
+ // Update the dead map.
+ HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
+ for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
+ DM[RR].subtract(Range);
+ } // for Range in range list
+ }
+ }
+}
+
+
void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
MachineBasicBlock &MB = *AI->getParent();
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 15a276354ef..c9cae04cb30 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -11,6 +11,7 @@
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
#include "Hexagon.h"
+#include "HexagonBlockRanges.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -124,6 +125,13 @@ private:
bool expandSpillMacros(MachineFunction &MF,
SmallVectorImpl<unsigned> &NewRegs) const;
+ unsigned findPhysReg(MachineFunction &MF, HexagonBlockRanges::IndexRange &FIR,
+ HexagonBlockRanges::InstrIndexMap &IndexMap,
+ HexagonBlockRanges::RegToRangeMap &DeadMap,
+ const TargetRegisterClass *RC) const;
+ void optimizeSpillSlots(MachineFunction &MF,
+ SmallVectorImpl<unsigned> &VRegs) const;
+
void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
MachineBasicBlock *&EpilogB) const;
diff --git a/llvm/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll b/llvm/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll
new file mode 100644
index 00000000000..561013b174d
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll
@@ -0,0 +1,49 @@
+; Check that a callee-saved register will be saved correctly if
+; the predicate-to-GPR spilling code uses it.
+;
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; We expect to spill p0 into a general-purpose register and keep it there,
+; without adding an extra spill of that register.
+;
+; CHECK: PredSpill:
+; CHECK: memd(r29{{.*}}) = r17:16
+; CHECK-DAG: r{{[0-9]+}} = p0
+; CHECK-DAG: p0 = r{{[0-9]+}}
+; CHECK-NOT: = memw(r29
+;
+
+define void @PredSpill() {
+entry:
+ br i1 undef, label %if.then, label %if.else.14
+
+if.then: ; preds = %entry
+ br i1 undef, label %if.end.57, label %if.else
+
+if.else: ; preds = %if.then
+ unreachable
+
+if.else.14: ; preds = %entry
+ br i1 undef, label %if.then.17, label %if.end.57
+
+if.then.17: ; preds = %if.else.14
+ br i1 undef, label %if.end.57, label %if.then.20
+
+if.then.20: ; preds = %if.then.17
+ %call21 = tail call i32 @myfun()
+ %tobool22 = icmp eq i32 %call21, 0
+ %0 = tail call i32 @myfun()
+ br i1 %tobool22, label %if.else.42, label %if.then.23
+
+if.then.23: ; preds = %if.then.20
+ unreachable
+
+if.else.42: ; preds = %if.then.20
+ ret void
+
+if.end.57: ; preds = %if.then.17, %if.else.14, %if.then
+ ret void
+}
+
+declare i32 @myfun()
+
diff --git a/llvm/test/CodeGen/Hexagon/avoid-predspill.ll b/llvm/test/CodeGen/Hexagon/avoid-predspill.ll
index 883c16a51a3..159c149c442 100644
--- a/llvm/test/CodeGen/Hexagon/avoid-predspill.ll
+++ b/llvm/test/CodeGen/Hexagon/avoid-predspill.ll
@@ -1,6 +1,3 @@
-; This functionality will be restored shortly.
-; XFAIL: *
-
; RUN: llc -march=hexagon -O2 < %s | FileCheck %s
;
; This checks that predicate registers are moved to GPRs instead of spilling
diff --git a/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll b/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
new file mode 100644
index 00000000000..6fb0a3e2658
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll
@@ -0,0 +1,144 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx-double \
+; RUN: -hexagon-bit=0 < %s | FileCheck %s
+
+; This spill should be eliminated.
+; CHECK-NOT: vmem(r29+#6)
+
+define void @test(i8* noalias nocapture %key, i8* noalias nocapture %data1) #0 {
+entry:
+ %0 = bitcast i8* %key to <32 x i32>*
+ %1 = bitcast i8* %data1 to <32 x i32>*
+ br label %for.body
+
+for.body:
+ %pkey.0542 = phi <32 x i32>* [ %0, %entry ], [ null, %for.body ]
+ %pdata0.0541 = phi <32 x i32>* [ null, %entry ], [ %add.ptr48, %for.body ]
+ %pdata1.0540 = phi <32 x i32>* [ %1, %entry ], [ %add.ptr49, %for.body ]
+ %dAccum0.0539 = phi <64 x i32> [ undef, %entry ], [ %86, %for.body ]
+ %2 = load <32 x i32>, <32 x i32>* %pkey.0542, align 128
+ %3 = load <32 x i32>, <32 x i32>* %pdata0.0541, align 128
+ %4 = load <32 x i32>, <32 x i32>* undef, align 128
+ %arrayidx4 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 2
+ %5 = load <32 x i32>, <32 x i32>* %arrayidx4, align 128
+ %arrayidx5 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 2
+ %6 = load <32 x i32>, <32 x i32>* %arrayidx5, align 128
+ %7 = load <32 x i32>, <32 x i32>* null, align 128
+ %8 = load <32 x i32>, <32 x i32>* undef, align 128
+ %9 = load <32 x i32>, <32 x i32>* null, align 128
+ %arrayidx9 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 3
+ %arrayidx10 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 6
+ %10 = load <32 x i32>, <32 x i32>* %arrayidx10, align 128
+ %arrayidx12 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 4
+ %11 = load <32 x i32>, <32 x i32>* %arrayidx12, align 128
+ %arrayidx13 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 8
+ %arrayidx14 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 8
+ %12 = load <32 x i32>, <32 x i32>* %arrayidx14, align 128
+ %arrayidx15 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 5
+ %13 = load <32 x i32>, <32 x i32>* %arrayidx15, align 128
+ %arrayidx16 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 10
+ %arrayidx17 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 10
+ %14 = load <32 x i32>, <32 x i32>* %arrayidx17, align 128
+ %arrayidx18 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 6
+ %15 = load <32 x i32>, <32 x i32>* %arrayidx18, align 128
+ %arrayidx19 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 12
+ %16 = load <32 x i32>, <32 x i32>* %arrayidx19, align 128
+ %arrayidx20 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 12
+ %17 = load <32 x i32>, <32 x i32>* %arrayidx20, align 128
+ %arrayidx22 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 14
+ %18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128
+ %arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14
+ %19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128
+ %20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
+ %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2)
+ %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11)
+ %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3)
+ %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef)
+ %25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
+ %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7)
+ %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15)
+ %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8)
+ %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16)
+ %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9)
+ %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17)
+ %32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
+ %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4)
+ %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13)
+ %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5)
+ %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef)
+ %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6)
+ %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14)
+ %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
+ %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
+ %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
+ %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
+ %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
+ %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
+ %45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
+ %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21)
+ %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26)
+ %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23)
+ %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28)
+ %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24)
+ %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30)
+ %52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
+ %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22)
+ %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27)
+ %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef)
+ %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31)
+ %57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
+ %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33)
+ %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39)
+ %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35)
+ %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37)
+ %62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
+ %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36)
+ %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44)
+ %65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
+ %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46)
+ %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48)
+ %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50)
+ %69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
+ %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
+ %71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
+ %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55)
+ %73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
+ %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64)
+ %75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67)
+ %76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef)
+ %77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72)
+ %78 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %74, <32 x i32> zeroinitializer)
+ %79 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %dAccum0.0539, <32 x i32> %75, i32 65537)
+ %80 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %79, <32 x i32> zeroinitializer, i32 65537)
+ %81 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %80, <32 x i32> zeroinitializer, i32 65537)
+ %82 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %81, <32 x i32> %76, i32 65537)
+ %83 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %82, <32 x i32> %77, i32 65537)
+ %84 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %83, <32 x i32> zeroinitializer, i32 65537)
+ %85 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %84, <32 x i32> undef, i32 65537)
+ %86 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %85, <32 x i32> %78, i32 65537)
+ store <32 x i32> %66, <32 x i32>* %pkey.0542, align 128
+ store <32 x i32> %75, <32 x i32>* %pdata0.0541, align 128
+ store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx4, align 128
+ store <32 x i32> zeroinitializer, <32 x i32>* undef, align 128
+ store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx20, align 128
+ store <32 x i32> zeroinitializer, <32 x i32>* null, align 128
+ %add.ptr48 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 16
+ %add.ptr49 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 16
+ br i1 false, label %for.end, label %for.body
+
+for.end:
+ %87 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %86)
+ ret void
+}
+
+declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
+
+declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
+
+declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1
+
+declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #1
+
+declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
new file mode 100644
index 00000000000..db9ed55d2da
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll
@@ -0,0 +1,80 @@
+; RUN: llc -O0 -march=hexagon -mcpu=hexagonv60 < %s | FileCheck %s
+
+; CHECK: vmem
+
+target triple = "hexagon"
+
+@vecpreds = external global [15 x <16 x i32>], align 64
+@vectors = external global [15 x <16 x i32>], align 64
+@vector_pairs = external global [15 x <32 x i32>], align 128
+@.str1 = external hidden unnamed_addr constant [20 x i8], align 1
+@.str2 = external hidden unnamed_addr constant [43 x i8], align 1
+@Q6VecPredResult = external global <16 x i32>, align 64
+@.str52 = external hidden unnamed_addr constant [57 x i8], align 1
+@.str54 = external hidden unnamed_addr constant [59 x i8], align 1
+@VectorResult = external global <16 x i32>, align 64
+@.str243 = external hidden unnamed_addr constant [60 x i8], align 1
+@.str251 = external hidden unnamed_addr constant [77 x i8], align 1
+@.str290 = external hidden unnamed_addr constant [65 x i8], align 1
+@VectorPairResult = external global <32 x i32>, align 128
+
+; Function Attrs: nounwind
+declare void @print_vector(i32, i8*) #0
+
+; Function Attrs: nounwind
+declare i32 @printf(i8*, ...) #0
+
+; Function Attrs: nounwind
+declare void @print_vecpred(i32, i8*) #0
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+
+; Function Attrs: nounwind
+declare void @init_vectors() #0
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
+
+; Function Attrs: nounwind
+declare void @init_addresses() #0
+
+; Function Attrs: nounwind
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ %0 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %1 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ call void @print_vecpred(i32 64, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*))
+ %2 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %call50 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([57 x i8], [57 x i8]* @.str52, i32 0, i32 0)) #3
+ %3 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %call52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @.str54, i32 0, i32 0)) #3
+ %4 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %call300 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str290, i32 0, i32 0)) #3
+ %5 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %6 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %call1373 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @.str243, i32 0, i32 0)) #3
+ %7 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+ %call1381 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str251, i32 0, i32 0)) #3
+ %8 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+ %9 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
+ call void @print_vector(i32 64, i8* bitcast (<16 x i32>* @VectorResult to i8*))
+ %10 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+ %11 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
+ %12 = bitcast <512 x i1> %11 to <16 x i32>
+ %13 = bitcast <16 x i32> %12 to <512 x i1>
+ %14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %13, <16 x i32> undef, <16 x i32> undef)
+ store <16 x i32> %14, <16 x i32>* @VectorResult, align 64
+ ret i32 0
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
diff --git a/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll b/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
new file mode 100644
index 00000000000..d120295fa52
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll
@@ -0,0 +1,80 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv60 -O2 -enable-hexagon-hvx < %s | FileCheck %s
+
+; CHECK: vmem(r{{[0-9]+}}+#3) = v{{[0-9]+}}
+; CHECK: call puts
+; CHECK: call print_vecpred
+; CHECK: v{{[0-9]+}}{{ *}}={{ *}}vmem(r{{[0-9]+}}+#3)
+
+target triple = "hexagon"
+
+@K = global i64 0, align 8
+@src = global i32 -1, align 4
+@Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
+@dst_addresses = common global [15 x i64] zeroinitializer, align 8
+@ptr_addresses = common global [15 x i8*] zeroinitializer, align 8
+@src_addresses = common global [15 x i8*] zeroinitializer, align 8
+@ptr = common global [32768 x i32] zeroinitializer, align 8
+@vecpreds = common global [15 x <16 x i32>] zeroinitializer, align 64
+@VectorResult = common global <16 x i32> zeroinitializer, align 64
+@vectors = common global [15 x <16 x i32>] zeroinitializer, align 64
+@VectorPairResult = common global <32 x i32> zeroinitializer, align 128
+@vector_pairs = common global [15 x <32 x i32>] zeroinitializer, align 128
+@str = private unnamed_addr constant [106 x i8] c"Q6VecPred4 : Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),INT32_MIN)\00"
+@str3 = private unnamed_addr constant [99 x i8] c"Q6VecPred4 : Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),-1)\00"
+@str4 = private unnamed_addr constant [98 x i8] c"Q6VecPred4 : Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),0)\00"
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ %call = tail call i32 bitcast (i32 (...)* @init_addresses to i32 ()*)() #3
+ %call1 = tail call i32 @acquire_vector_unit(i8 zeroext 0) #3
+ tail call void @init_vectors() #3
+ %0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2)
+ %1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009)
+ %2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
+ %3 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -2147483648)
+ %4 = bitcast <512 x i1> %3 to <16 x i32>
+ store <16 x i32> %4, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([106 x i8], [106 x i8]* @str, i32 0, i32 0))
+ tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
+ %5 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -1)
+ %6 = bitcast <512 x i1> %5 to <16 x i32>
+ store <16 x i32> %6, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
+ %puts5 = tail call i32 @puts(i8* getelementptr inbounds ([99 x i8], [99 x i8]* @str3, i32 0, i32 0))
+ tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
+ %7 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 0)
+ %8 = bitcast <512 x i1> %7 to <16 x i32>
+ store <16 x i32> %8, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
+ %puts6 = tail call i32 @puts(i8* getelementptr inbounds ([98 x i8], [98 x i8]* @str4, i32 0, i32 0))
+ tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
+ ret i32 0
+}
+
+declare i32 @init_addresses(...) #1
+
+declare i32 @acquire_vector_unit(i8 zeroext) #1
+
+declare void @init_vectors() #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #2
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #2
+
+declare void @print_vecpred(i32, i8*) #1
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
OpenPOWER on IntegriCloud