diff options
| -rw-r--r-- | llvm/lib/Target/Hexagon/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp | 484 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonBlockRanges.h | 240 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp | 359 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonFrameLowering.h | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll | 49 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/avoid-predspill.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll | 144 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll | 80 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll | 80 |
10 files changed, 1442 insertions, 6 deletions
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index f7b7990d2b3..b329948ee7d 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_target(HexagonCodeGen HexagonAsmPrinter.cpp HexagonBitSimplify.cpp HexagonBitTracker.cpp + HexagonBlockRanges.cpp HexagonCFGOptimizer.cpp HexagonCommonGEP.cpp HexagonCopyToCombine.cpp diff --git a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp new file mode 100644 index 00000000000..ac5449afdc6 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -0,0 +1,484 @@ +//===--- HexagonBlockRanges.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hbr" + +#include "HexagonBlockRanges.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <map> +#include <vector> + +using namespace llvm; + +bool HexagonBlockRanges::IndexRange::overlaps(const IndexRange &A) const { + // If A contains start(), or "this" contains A.start(), then overlap. + IndexType S = start(), E = end(), AS = A.start(), AE = A.end(); + if (AS == S) + return true; + bool SbAE = (S < AE) || (S == AE && A.TiedEnd); // S-before-AE. + bool ASbE = (AS < E) || (AS == E && TiedEnd); // AS-before-E. + if ((AS < S && SbAE) || (S < AS && ASbE)) + return true; + // Otherwise no overlap. + return false; +} + + +bool HexagonBlockRanges::IndexRange::contains(const IndexRange &A) const { + if (start() <= A.start()) { + // Treat "None" in the range end as equal to the range start. + IndexType E = (end() != IndexType::None) ? end() : start(); + IndexType AE = (A.end() != IndexType::None) ? A.end() : A.start(); + if (AE <= E) + return true; + } + return false; +} + + +void HexagonBlockRanges::IndexRange::merge(const IndexRange &A) { + // Allow merging adjacent ranges. + assert(end() == A.start() || overlaps(A)); + IndexType AS = A.start(), AE = A.end(); + if (AS < start() || start() == IndexType::None) + setStart(AS); + if (end() < AE || end() == IndexType::None) { + setEnd(AE); + TiedEnd = A.TiedEnd; + } else { + if (end() == AE) + TiedEnd |= A.TiedEnd; + } + if (A.Fixed) + Fixed = true; +} + + +void HexagonBlockRanges::RangeList::include(const RangeList &RL) { + for (auto &R : RL) + if (std::find(begin(), end(), R) == end()) + push_back(R); +} + + +// Merge all overlapping ranges in the list, so that all that remains +// is a list of disjoint ranges. +void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) { + if (empty()) + return; + + std::sort(begin(), end()); + iterator Iter = begin(); + + while (Iter != end()-1) { + iterator Next = std::next(Iter); + // If MergeAdjacent is true, merge ranges A and B, where A.end == B.start. + // This allows merging dead ranges, but is not valid for live ranges. + bool Merge = MergeAdjacent && (Iter->end() == Next->start()); + if (Merge || Iter->overlaps(*Next)) { + Iter->merge(*Next); + erase(Next); + continue; + } + ++Iter; + } +} + + +// Compute a range A-B and add it to the list. +void HexagonBlockRanges::RangeList::addsub(const IndexRange &A, + const IndexRange &B) { + // Exclusion of non-overlapping ranges makes some checks simpler + // later in this function. + if (!A.overlaps(B)) { + // A - B = A. + add(A); + return; + } + + IndexType AS = A.start(), AE = A.end(); + IndexType BS = B.start(), BE = B.end(); + + // If AE is None, then A is included in B, since A and B overlap. + // The result of subtraction if empty, so just return. + if (AE == IndexType::None) + return; + + if (AS < BS) { + // A starts before B. + // AE cannot be None since A and B overlap. + assert(AE != IndexType::None); + // Add the part of A that extends on the "less" side of B. + add(AS, BS, A.Fixed, false); + } + + if (BE < AE) { + // BE cannot be Exit here. + if (BE == IndexType::None) + add(BS, AE, A.Fixed, false); + else + add(BE, AE, A.Fixed, false); + } +} + + +// Subtract a given range from each element in the list. +void HexagonBlockRanges::RangeList::subtract(const IndexRange &Range) { + // Cannot assume that the list is unionized (i.e. contains only non- + // overlapping ranges. + RangeList T; + for (iterator Next, I = begin(); I != end(); I = Next) { + IndexRange &Rg = *I; + if (Rg.overlaps(Range)) { + T.addsub(Rg, Range); + Next = this->erase(I); + } else { + Next = std::next(I); + } + } + include(T); +} + + +HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B) + : Block(B) { + IndexType Idx = IndexType::First; + First = Idx; + for (auto &In : B) { + if (In.isDebugValue()) + continue; + assert(getIndex(&In) == IndexType::None && "Instruction already in map"); + Map.insert(std::make_pair(Idx, &In)); + ++Idx; + } + Last = B.empty() ? IndexType::None : unsigned(Idx)-1; +} + + +MachineInstr *HexagonBlockRanges::InstrIndexMap::getInstr(IndexType Idx) const { + auto F = Map.find(Idx); + return (F != Map.end()) ? F->second : 0; +} + + +HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getIndex( + MachineInstr *MI) const { + for (auto &I : Map) + if (I.second == MI) + return I.first; + return IndexType::None; +} + + +HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getPrevIndex( + IndexType Idx) const { + assert (Idx != IndexType::None); + if (Idx == IndexType::Entry) + return IndexType::None; + if (Idx == IndexType::Exit) + return Last; + if (Idx == First) + return IndexType::Entry; + return unsigned(Idx)-1; +} + + +HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getNextIndex( + IndexType Idx) const { + assert (Idx != IndexType::None); + if (Idx == IndexType::Entry) + return IndexType::First; + if (Idx == IndexType::Exit || Idx == Last) + return IndexType::None; + return unsigned(Idx)+1; +} + + +void HexagonBlockRanges::InstrIndexMap::replaceInstr(MachineInstr *OldMI, + MachineInstr *NewMI) { + for (auto &I : Map) { + if (I.second != OldMI) + continue; + if (NewMI != nullptr) + I.second = NewMI; + else + Map.erase(I.first); + break; + } +} + + +HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf) + : MF(mf), HST(mf.getSubtarget<HexagonSubtarget>()), + TII(*HST.getInstrInfo()), TRI(*HST.getRegisterInfo()), + Reserved(TRI.getReservedRegs(mf)) { + // Consider all non-allocatable registers as reserved. + for (auto I = TRI.regclass_begin(), E = TRI.regclass_end(); I != E; ++I) { + auto *RC = *I; + if (RC->isAllocatable()) + continue; + for (unsigned R : *RC) + Reserved[R] = true; + } +} + + +HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns( + const MachineBasicBlock &B) { + RegisterSet LiveIns; + for (auto I : B.liveins()) + if (!Reserved[I.PhysReg]) + LiveIns.insert({I.PhysReg, 0}); + return LiveIns; +} + + +HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs( + RegisterRef R, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) { + RegisterSet SRs; + + if (R.Sub != 0) { + SRs.insert(R); + return SRs; + } + + if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) { + MCSubRegIterator I(R.Reg, &TRI); + if (!I.isValid()) + SRs.insert({R.Reg, 0}); + for (; I.isValid(); ++I) + SRs.insert({*I, 0}); + } else { + assert(TargetRegisterInfo::isVirtualRegister(R.Reg)); + auto &RC = *MRI.getRegClass(R.Reg); + unsigned PReg = *RC.begin(); + MCSubRegIndexIterator I(PReg, &TRI); + if (!I.isValid()) + SRs.insert({R.Reg, 0}); + for (; I.isValid(); ++I) + SRs.insert({R.Reg, I.getSubRegIndex()}); + } + return SRs; +} + + +void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, + RegToRangeMap &LiveMap) { + std::map<RegisterRef,IndexType> LastDef, LastUse; + RegisterSet LiveOnEntry; + MachineBasicBlock &B = IndexMap.getBlock(); + MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); + + for (auto R : getLiveIns(B)) + for (auto S : expandToSubRegs(R, MRI, TRI)) + LiveOnEntry.insert(S); + + for (auto R : LiveOnEntry) + LastDef[R] = IndexType::Entry; + + auto closeRange = [&LastUse,&LastDef,&LiveMap] (RegisterRef R) -> void { + auto LD = LastDef[R], LU = LastUse[R]; + if (LD == IndexType::None) + LD = IndexType::Entry; + if (LU == IndexType::None) + LU = IndexType::Exit; + LiveMap[R].add(LD, LU, false, false); + LastUse[R] = LastDef[R] = IndexType::None; + }; + + for (auto &In : B) { + if (In.isDebugValue()) + continue; + IndexType Index = IndexMap.getIndex(&In); + // Process uses first. + for (auto &Op : In.operands()) { + if (!Op.isReg() || !Op.isUse() || Op.isUndef()) + continue; + RegisterRef R = { Op.getReg(), Op.getSubReg() }; + if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg]) + continue; + bool IsKill = Op.isKill(); + for (auto S : expandToSubRegs(R, MRI, TRI)) { + LastUse[S] = Index; + if (IsKill) + closeRange(S); + } + } + // Process defs. + for (auto &Op : In.operands()) { + if (!Op.isReg() || !Op.isDef() || Op.isUndef()) + continue; + RegisterRef R = { Op.getReg(), Op.getSubReg() }; + if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg]) + continue; + for (auto S : expandToSubRegs(R, MRI, TRI)) { + if (LastDef[S] != IndexType::None) + closeRange(S); + LastDef[S] = Index; + } + } + } + + // Collect live-on-exit. + RegisterSet LiveOnExit; + for (auto *SB : B.successors()) + for (auto R : getLiveIns(*SB)) + for (auto S : expandToSubRegs(R, MRI, TRI)) + LiveOnExit.insert(S); + + for (auto R : LiveOnExit) + LastUse[R] = IndexType::Exit; + + // Process remaining registers. + RegisterSet Left; + for (auto &I : LastUse) + if (I.second != IndexType::None) + Left.insert(I.first); + for (auto &I : LastDef) + if (I.second != IndexType::None) + Left.insert(I.first); + for (auto R : Left) + closeRange(R); + + // Finalize the live ranges. + for (auto &P : LiveMap) + P.second.unionize(); +} + + +HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeLiveMap( + InstrIndexMap &IndexMap) { + RegToRangeMap LiveMap; + DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n'); + computeInitialLiveRanges(IndexMap, LiveMap); + DEBUG(dbgs() << __func__ << ": live map\n" + << PrintRangeMap(LiveMap, TRI) << '\n'); + return LiveMap; +} + + +HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap( + InstrIndexMap &IndexMap, RegToRangeMap &LiveMap) { + RegToRangeMap DeadMap; + + auto addDeadRanges = [&IndexMap,&LiveMap,&DeadMap] (RegisterRef R) -> void { + auto F = LiveMap.find(R); + if (F == LiveMap.end() || F->second.empty()) { + DeadMap[R].add(IndexType::Entry, IndexType::Exit, false, false); + return; + } + + RangeList &RL = F->second; + RangeList::iterator A = RL.begin(), Z = RL.end()-1; + + // Try to create the initial range. + if (A->start() != IndexType::Entry) { + IndexType DE = IndexMap.getPrevIndex(A->start()); + if (DE != IndexType::Entry) + DeadMap[R].add(IndexType::Entry, DE, false, false); + } + + while (A != Z) { + // Creating a dead range that follows A. Pay attention to empty + // ranges (i.e. those ending with "None"). + IndexType AE = (A->end() == IndexType::None) ? A->start() : A->end(); + IndexType DS = IndexMap.getNextIndex(AE); + ++A; + IndexType DE = IndexMap.getPrevIndex(A->start()); + if (DS < DE) + DeadMap[R].add(DS, DE, false, false); + } + + // Try to create the final range. + if (Z->end() != IndexType::Exit) { + IndexType ZE = (Z->end() == IndexType::None) ? Z->start() : Z->end(); + IndexType DS = IndexMap.getNextIndex(ZE); + if (DS < IndexType::Exit) + DeadMap[R].add(DS, IndexType::Exit, false, false); + } + }; + + MachineFunction &MF = *IndexMap.getBlock().getParent(); + auto &MRI = MF.getRegInfo(); + unsigned NumRegs = TRI.getNumRegs(); + BitVector Visited(NumRegs); + for (unsigned R = 1; R < NumRegs; ++R) { + for (auto S : expandToSubRegs({R,0}, MRI, TRI)) { + if (Reserved[S.Reg] || Visited[S.Reg]) + continue; + addDeadRanges(S); + Visited[S.Reg] = true; + } + } + for (auto &P : LiveMap) + if (TargetRegisterInfo::isVirtualRegister(P.first.Reg)) + addDeadRanges(P.first); + + DEBUG(dbgs() << __func__ << ": dead map\n" + << PrintRangeMap(DeadMap, TRI) << '\n'); + return DeadMap; +} + + +raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx) { + if (Idx == HexagonBlockRanges::IndexType::None) + return OS << '-'; + if (Idx == HexagonBlockRanges::IndexType::Entry) + return OS << 'n'; + if (Idx == HexagonBlockRanges::IndexType::Exit) + return OS << 'x'; + return OS << unsigned(Idx)-HexagonBlockRanges::IndexType::First+1; +} + +// A mapping to translate between instructions and their indices. +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::IndexRange &IR) { + OS << '[' << IR.start() << ':' << IR.end() << (IR.TiedEnd ? '}' : ']'); + if (IR.Fixed) + OS << '!'; + return OS; +} + +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::RangeList &RL) { + for (auto &R : RL) + OS << R << " "; + return OS; +} + +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::InstrIndexMap &M) { + for (auto &In : M.Block) { + HexagonBlockRanges::IndexType Idx = M.getIndex(&In); + OS << Idx << (Idx == M.Last ? ". " : " ") << In; + } + return OS; +} + +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::PrintRangeMap &P) { + for (auto &I : P.Map) { + const HexagonBlockRanges::RangeList &RL = I.second; + OS << PrintReg(I.first.Reg, &P.TRI, I.first.Sub) << " -> " << RL << "\n"; + } + return OS; +} diff --git a/llvm/lib/Target/Hexagon/HexagonBlockRanges.h b/llvm/lib/Target/Hexagon/HexagonBlockRanges.h new file mode 100644 index 00000000000..a7f8fd2c8dd --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonBlockRanges.h @@ -0,0 +1,240 @@ +//===--- HexagonBlockRanges.h ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef HEXAGON_BLOCK_RANGES_H +#define HEXAGON_BLOCK_RANGES_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCRegisterInfo.h" // For MCPhysReg. +#include <map> +#include <set> +#include <vector> + +namespace llvm { + class Function; + class HexagonSubtarget; + class MachineBasicBlock; + class MachineFunction; + class MachineInstr; + class MCInstrDesc; + class raw_ostream; + class TargetInstrInfo; + class TargetRegisterClass; + class TargetRegisterInfo; + class Type; +} + +using namespace llvm; + +struct HexagonBlockRanges { + HexagonBlockRanges(MachineFunction &MF); + + struct RegisterRef { + unsigned Reg, Sub; + bool operator<(RegisterRef R) const { + return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub); + } + }; + typedef std::set<RegisterRef> RegisterSet; + + // This is to represent an "index", which is an abstraction of a position + // of an instruction within a basic block. + class IndexType { + public: + enum : unsigned { + None = 0, + Entry = 1, + Exit = 2, + First = 11 // 10th + 1st + }; + static bool isInstr(IndexType X) { return X.Index >= First; } + + IndexType() : Index(None) {} + IndexType(unsigned Idx) : Index(Idx) {} + operator unsigned() const; + bool operator== (unsigned x) const; + bool operator== (IndexType Idx) const; + bool operator!= (unsigned x) const; + bool operator!= (IndexType Idx) const; + IndexType operator++ (); + bool operator< (unsigned Idx) const; + bool operator< (IndexType Idx) const; + bool operator<= (IndexType Idx) const; + + private: + bool operator> (IndexType Idx) const; + bool operator>= (IndexType Idx) const; + + unsigned Index; + }; + + // A range of indices, essentially a representation of a live range. + // This is also used to represent "dead ranges", i.e. ranges where a + // register is dead. + class IndexRange : public std::pair<IndexType,IndexType> { + public: + IndexRange() : Fixed(false), TiedEnd(false) {} + IndexRange(IndexType Start, IndexType End, bool F = false, bool T = false) + : std::pair<IndexType,IndexType>(Start, End), Fixed(F), TiedEnd(T) {} + IndexType start() const { return first; } + IndexType end() const { return second; } + + bool operator< (const IndexRange &A) const { + return start() < A.start(); + } + bool overlaps(const IndexRange &A) const; + bool contains(const IndexRange &A) const; + void merge(const IndexRange &A); + + bool Fixed; // Can be renamed? "Fixed" means "no". + bool TiedEnd; // The end is not a use, but a dead def tied to a use. + + private: + void setStart(const IndexType &S) { first = S; } + void setEnd(const IndexType &E) { second = E; } + }; + + // A list of index ranges. This represents liveness of a register + // in a basic block. + class RangeList : public std::vector<IndexRange> { + public: + void add(IndexType Start, IndexType End, bool Fixed, bool TiedEnd) { + push_back(IndexRange(Start, End, Fixed, TiedEnd)); + } + void add(const IndexRange &Range) { + push_back(Range); + } + void include(const RangeList &RL); + void unionize(bool MergeAdjacent = false); + void subtract(const IndexRange &Range); + + private: + void addsub(const IndexRange &A, const IndexRange &B); + }; + + class InstrIndexMap { + public: + InstrIndexMap(MachineBasicBlock &B); + MachineInstr *getInstr(IndexType Idx) const; + IndexType getIndex(MachineInstr *MI) const; + MachineBasicBlock &getBlock() const { return Block; } + IndexType getPrevIndex(IndexType Idx) const; + IndexType getNextIndex(IndexType Idx) const; + void replaceInstr(MachineInstr *OldMI, MachineInstr *NewMI); + + friend raw_ostream &operator<< (raw_ostream &OS, const InstrIndexMap &Map); + IndexType First, Last; + + private: + MachineBasicBlock &Block; + std::map<IndexType,MachineInstr*> Map; + }; + + typedef std::map<RegisterRef,RangeList> RegToRangeMap; + RegToRangeMap computeLiveMap(InstrIndexMap &IndexMap); + RegToRangeMap computeDeadMap(InstrIndexMap &IndexMap, RegToRangeMap &LiveMap); + static RegisterSet expandToSubRegs(RegisterRef R, + const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI); + + struct PrintRangeMap { + PrintRangeMap(const RegToRangeMap &M, const TargetRegisterInfo &I) + : Map(M), TRI(I) {} + + friend raw_ostream &operator<< (raw_ostream &OS, const PrintRangeMap &P); + private: + const RegToRangeMap ⤅ + const TargetRegisterInfo &TRI; + }; + +private: + RegisterSet getLiveIns(const MachineBasicBlock &B); + + void computeInitialLiveRanges(InstrIndexMap &IndexMap, + RegToRangeMap &LiveMap); + + MachineFunction &MF; + const HexagonSubtarget &HST; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + BitVector Reserved; +}; + + +inline HexagonBlockRanges::IndexType::operator unsigned() const { + assert(Index >= First); + return Index; +} + +inline bool HexagonBlockRanges::IndexType::operator== (unsigned x) const { + return Index == x; +} + +inline bool HexagonBlockRanges::IndexType::operator== (IndexType Idx) const { + return Index == Idx.Index; +} + +inline bool HexagonBlockRanges::IndexType::operator!= (unsigned x) const { + return Index != x; +} + +inline bool HexagonBlockRanges::IndexType::operator!= (IndexType Idx) const { + return Index != Idx.Index; +} + +inline +HexagonBlockRanges::IndexType HexagonBlockRanges::IndexType::operator++ () { + assert(Index != None); + assert(Index != Exit); + if (Index == Entry) + Index = First; + else + ++Index; + return *this; +} + +inline bool HexagonBlockRanges::IndexType::operator< (unsigned Idx) const { + return operator< (IndexType(Idx)); +} + +inline bool HexagonBlockRanges::IndexType::operator< (IndexType Idx) const { + // !(x < x). + if (Index == Idx.Index) + return false; + // !(None < x) for all x. + // !(x < None) for all x. + if (Index == None || Idx.Index == None) + return false; + // !(Exit < x) for all x. + // !(x < Entry) for all x. + if (Index == Exit || Idx.Index == Entry) + return false; + // Entry < x for all x != Entry. + // x < Exit for all x != Exit. + if (Index == Entry || Idx.Index == Exit) + return true; + + return Index < Idx.Index; +} + +inline bool HexagonBlockRanges::IndexType::operator<= (IndexType Idx) const { + return operator==(Idx) || operator<(Idx); +} + + +raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx); +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::IndexRange &IR); +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::RangeList &RL); +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::InstrIndexMap &M); +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::PrintRangeMap &P); + +#endif diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index cf4d110911f..ac3242b7922 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -10,6 +10,7 @@ #define DEBUG_TYPE "hexagon-pei" +#include "HexagonBlockRanges.h" #include "HexagonFrameLowering.h" #include "HexagonInstrInfo.h" #include "HexagonMachineFunctionInfo.h" @@ -147,6 +148,9 @@ static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX), static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true), cl::Hidden, cl::desc("Use allocframe more conservatively")); +static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden, + cl::init(true), cl::desc("Optimize spill slots")); + namespace llvm { void initializeHexagonCallFrameInformationPass(PassRegistry&); @@ -1046,13 +1050,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF, // Check for an unused caller-saved register. for ( ; *CallerSavedRegs; ++CallerSavedRegs) { MCPhysReg FreeReg = *CallerSavedRegs; - if (!MRI.reg_nodbg_empty(FreeReg)) + if (MRI.isPhysRegUsed(FreeReg)) continue; // Check aliased register usage. bool IsCurrentRegUsed = false; for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI) - if (!MRI.reg_nodbg_empty(*AI)) { + if (MRI.isPhysRegUsed(*AI)) { IsCurrentRegUsed = true; break; } @@ -1634,7 +1638,8 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, // Replace predicate register pseudo spill code. SmallVector<unsigned,8> NewRegs; expandSpillMacros(MF, NewRegs); - + if (OptimizeSpillSlots) + optimizeSpillSlots(MF, NewRegs); // We need to reserve a a spill slot if scavenging could potentially require // spilling a scavenged register. @@ -1665,6 +1670,354 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, } +unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF, + HexagonBlockRanges::IndexRange &FIR, + HexagonBlockRanges::InstrIndexMap &IndexMap, + HexagonBlockRanges::RegToRangeMap &DeadMap, + const TargetRegisterClass *RC) const { + auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + auto &MRI = MF.getRegInfo(); + + auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool { + auto F = DeadMap.find({Reg,0}); + if (F == DeadMap.end()) + return false; + for (auto &DR : F->second) + if (DR.contains(FIR)) + return true; + return false; + }; + + for (unsigned Reg : RC->getRawAllocationOrder(MF)) { + bool Dead = true; + for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) { + if (isDead(R.Reg)) + continue; + Dead = false; + break; + } + if (Dead) + return Reg; + } + return 0; +} + +void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, + SmallVectorImpl<unsigned> &VRegs) const { + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + auto &MRI = MF.getRegInfo(); + HexagonBlockRanges HBR(MF); + + typedef std::map<MachineBasicBlock*,HexagonBlockRanges::InstrIndexMap> + BlockIndexMap; + typedef std::map<MachineBasicBlock*,HexagonBlockRanges::RangeList> + BlockRangeMap; + typedef HexagonBlockRanges::IndexType IndexType; + + struct SlotInfo { + BlockRangeMap Map; + unsigned Size = 0; + const TargetRegisterClass *RC = nullptr; + }; + + BlockIndexMap BlockIndexes; + SmallSet<int,4> BadFIs; + std::map<int,SlotInfo> FIRangeMap; + + auto getRegClass = [&MRI,&HRI] (HexagonBlockRanges::RegisterRef R) + -> const TargetRegisterClass* { + if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) + assert(R.Sub == 0); + if (TargetRegisterInfo::isVirtualRegister(R.Reg)) { + auto *RCR = MRI.getRegClass(R.Reg); + if (R.Sub == 0) + return RCR; + unsigned PR = *RCR->begin(); + R.Reg = HRI.getSubReg(PR, R.Sub); + } + return HRI.getMinimalPhysRegClass(R.Reg); + }; + // Accumulate register classes: get a common class for a pre-existing + // class HaveRC and a new class NewRC. Return nullptr if a common class + // cannot be found, otherwise return the resulting class. If HaveRC is + // nullptr, assume that it is still unset. + auto getCommonRC = [&HRI] (const TargetRegisterClass *HaveRC, + const TargetRegisterClass *NewRC) + -> const TargetRegisterClass* { + if (HaveRC == nullptr || HaveRC == NewRC) + return NewRC; + // Different classes, both non-null. Pick the more general one. + if (HaveRC->hasSubClassEq(NewRC)) + return HaveRC; + if (NewRC->hasSubClassEq(HaveRC)) + return NewRC; + return nullptr; + }; + + // Scan all blocks in the function. Check all occurrences of frame indexes, + // and collect relevant information. + for (auto &B : MF) { + std::map<int,IndexType> LastStore, LastLoad; + auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B)); + auto &IndexMap = P.first->second; + DEBUG(dbgs() << "Index map for BB#" << B.getNumber() << "\n" + << IndexMap << '\n'); + + for (auto &In : B) { + int LFI, SFI; + bool Load = HII.isLoadFromStackSlot(&In, LFI) && !HII.isPredicated(&In); + bool Store = HII.isStoreToStackSlot(&In, SFI) && !HII.isPredicated(&In); + if (Load && Store) { + // If it's both a load and a store, then we won't handle it. + BadFIs.insert(LFI); + BadFIs.insert(SFI); + continue; + } + // Check for register classes of the register used as the source for + // the store, and the register used as the destination for the load. + // Also, only accept base+imm_offset addressing modes. Other addressing + // modes can have side-effects (post-increments, etc.). For stack + // slots they are very unlikely, so there is not much loss due to + // this restriction. + if (Load || Store) { + int TFI = Load ? LFI : SFI; + unsigned AM = HII.getAddrMode(&In); + SlotInfo &SI = FIRangeMap[TFI]; + bool Bad = (AM != HexagonII::BaseImmOffset); + if (!Bad) { + // If the addressing mode is ok, check the register class. + const TargetRegisterClass *RC = nullptr; + if (Load) { + MachineOperand &DataOp = In.getOperand(0); + RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()}); + } else { + MachineOperand &DataOp = In.getOperand(2); + RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()}); + } + RC = getCommonRC(SI.RC, RC); + if (RC == nullptr) + Bad = true; + else + SI.RC = RC; + } + if (!Bad) { + // Check sizes. + unsigned S = (1U << (HII.getMemAccessSize(&In) - 1)); + if (SI.Size != 0 && SI.Size != S) + Bad = true; + else + SI.Size = S; + } + if (Bad) + BadFIs.insert(TFI); + } + + // Locate uses of frame indices. + for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) { + const MachineOperand &Op = In.getOperand(i); + if (!Op.isFI()) + continue; + int FI = Op.getIndex(); + // Make sure that the following operand is an immediate and that + // it is 0. This is the offset in the stack object. + if (i+1 >= n || !In.getOperand(i+1).isImm() || + In.getOperand(i+1).getImm() != 0) + BadFIs.insert(FI); + if (BadFIs.count(FI)) + continue; + + IndexType Index = IndexMap.getIndex(&In); + if (Load) { + if (LastStore[FI] == IndexType::None) + LastStore[FI] = IndexType::Entry; + LastLoad[FI] = Index; + } else if (Store) { + HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B]; + if (LastStore[FI] != IndexType::None) + RL.add(LastStore[FI], LastLoad[FI], false, false); + else if (LastLoad[FI] != IndexType::None) + RL.add(IndexType::Entry, LastLoad[FI], false, false); + LastLoad[FI] = IndexType::None; + LastStore[FI] = Index; + } else { + BadFIs.insert(FI); + } + } + } + + for (auto &I : LastLoad) { + IndexType LL = I.second; + if (LL == IndexType::None) + continue; + auto &RL = FIRangeMap[I.first].Map[&B]; + IndexType &LS = LastStore[I.first]; + if (LS != IndexType::None) + RL.add(LS, LL, false, false); + else + RL.add(IndexType::Entry, LL, false, false); + LS = IndexType::None; + } + for (auto &I : LastStore) { + IndexType LS = I.second; + if (LS == IndexType::None) + continue; + auto &RL = FIRangeMap[I.first].Map[&B]; + RL.add(LS, IndexType::None, false, false); + } + } + + DEBUG({ + for (auto &P : FIRangeMap) { + dbgs() << "fi#" << P.first; + if (BadFIs.count(P.first)) + dbgs() << " (bad)"; + dbgs() << " RC: "; + if (P.second.RC != nullptr) + dbgs() << HRI.getRegClassName(P.second.RC) << '\n'; + else + dbgs() << "<null>\n"; + for (auto &R : P.second.Map) + dbgs() << " BB#" << R.first->getNumber() << " { " << R.second << "}\n"; + } + }); + + // When a slot is loaded from in a block without being stored to in the + // same block, it is live-on-entry to this block. To avoid CFG analysis, + // consider this slot to be live-on-exit from all blocks. + SmallSet<int,4> LoxFIs; + + std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap; + + for (auto &P : FIRangeMap) { + // P = pair(FI, map: BB->RangeList) + if (BadFIs.count(P.first)) + continue; + for (auto &B : MF) { + auto F = P.second.Map.find(&B); + // F = pair(BB, RangeList) + if (F == P.second.Map.end() || F->second.empty()) + continue; + HexagonBlockRanges::IndexRange &IR = F->second.front(); + if (IR.start() == IndexType::Entry) + LoxFIs.insert(P.first); + BlockFIMap[&B].push_back(P.first); + } + } + + DEBUG({ + dbgs() << "Block-to-FI map (* -- live-on-exit):\n"; + for (auto &P : BlockFIMap) { + auto &FIs = P.second; + if (FIs.empty()) + continue; + dbgs() << " BB#" << P.first->getNumber() << ": {"; + for (auto I : FIs) { + dbgs() << " fi#" << I; + if (LoxFIs.count(I)) + dbgs() << '*'; + } + dbgs() << " }\n"; + } + }); + + // eliminate loads, when all loads eliminated, eliminate all stores. + for (auto &B : MF) { + auto F = BlockIndexes.find(&B); + assert(F != BlockIndexes.end()); + HexagonBlockRanges::InstrIndexMap &IM = F->second; + HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM); + HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM); + DEBUG(dbgs() << "BB#" << B.getNumber() << " dead map\n" + << HexagonBlockRanges::PrintRangeMap(DM, HRI)); + + for (auto FI : BlockFIMap[&B]) { + if (BadFIs.count(FI)) + continue; + DEBUG(dbgs() << "Working on fi#" << FI << '\n'); + HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B]; + for (auto &Range : RL) { + DEBUG(dbgs() << "--Examining range:" << RL << '\n'); + if (!IndexType::isInstr(Range.start()) || + !IndexType::isInstr(Range.end())) + continue; + MachineInstr *SI = IM.getInstr(Range.start()); + MachineInstr *EI = IM.getInstr(Range.end()); + assert(SI->mayStore() && "Unexpected start instruction"); + assert(EI->mayLoad() && "Unexpected end instruction"); + MachineOperand &SrcOp = SI->getOperand(2); + + HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(), + SrcOp.getSubReg() }; + auto *RC = getRegClass({SrcOp.getReg(), SrcOp.getSubReg()}); + // The this-> is needed to unconfuse MSVC. + unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC); + DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n'); + if (FoundR == 0) + continue; + + // Generate the copy-in: "FoundR = COPY SrcR" at the store location. + MachineBasicBlock::iterator StartIt = SI, NextIt; + MachineInstr *CopyIn = nullptr; + if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) { + DebugLoc DL = SI->getDebugLoc(); + CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR) + .addOperand(SrcOp); + } + + ++StartIt; + // Check if this is a last store and the FI is live-on-exit. + if (LoxFIs.count(FI) && (&Range == &RL.back())) { + // Update store's source register. + if (unsigned SR = SrcOp.getSubReg()) + SrcOp.setReg(HRI.getSubReg(FoundR, SR)); + else + SrcOp.setReg(FoundR); + SrcOp.setSubReg(0); + // We are keeping this register live. + SrcOp.setIsKill(false); + } else { + B.erase(SI); + IM.replaceInstr(SI, CopyIn); + } + + auto EndIt = std::next(MachineBasicBlock::iterator(EI)); + for (auto It = StartIt; It != EndIt; It = NextIt) { + MachineInstr *MI = &*It; + NextIt = std::next(It); + int TFI; + if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI) + continue; + unsigned DstR = MI->getOperand(0).getReg(); + assert(MI->getOperand(0).getSubReg() == 0); + MachineInstr *CopyOut = nullptr; + if (DstR != FoundR) { + DebugLoc DL = MI->getDebugLoc(); + unsigned MemSize = (1U << (HII.getMemAccessSize(MI) - 1)); + assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset); + unsigned CopyOpc = TargetOpcode::COPY; + if (HII.isSignExtendingLoad(MI)) + CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth; + else if (HII.isZeroExtendingLoad(MI)) + CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth; + CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR) + .addReg(FoundR, getKillRegState(MI == EI)); + } + IM.replaceInstr(MI, CopyOut); + B.erase(It); + } + + // Update the dead map. + HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 }; + for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI)) + DM[RR].subtract(Range); + } // for Range in range list + } + } +} + + void HexagonFrameLowering::expandAlloca(MachineInstr *AI, const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const { MachineBasicBlock &MB = *AI->getParent(); diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h index 15a276354ef..c9cae04cb30 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -11,6 +11,7 @@ #define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H #include "Hexagon.h" +#include "HexagonBlockRanges.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -124,6 +125,13 @@ private: bool expandSpillMacros(MachineFunction &MF, SmallVectorImpl<unsigned> &NewRegs) const; + unsigned findPhysReg(MachineFunction &MF, HexagonBlockRanges::IndexRange &FIR, + HexagonBlockRanges::InstrIndexMap &IndexMap, + HexagonBlockRanges::RegToRangeMap &DeadMap, + const TargetRegisterClass *RC) const; + void optimizeSpillSlots(MachineFunction &MF, + SmallVectorImpl<unsigned> &VRegs) const; + void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const; diff --git a/llvm/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll b/llvm/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll new file mode 100644 index 00000000000..561013b174d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/avoid-predspill-calleesaved.ll @@ -0,0 +1,49 @@ +; Check that a callee-saved register will be saved correctly if +; the predicate-to-GPR spilling code uses it. +; +; RUN: llc -march=hexagon < %s | FileCheck %s +; +; We expect to spill p0 into a general-purpose register and keep it there, +; without adding an extra spill of that register. +; +; CHECK: PredSpill: +; CHECK: memd(r29{{.*}}) = r17:16 +; CHECK-DAG: r{{[0-9]+}} = p0 +; CHECK-DAG: p0 = r{{[0-9]+}} +; CHECK-NOT: = memw(r29 +; + +define void @PredSpill() { +entry: + br i1 undef, label %if.then, label %if.else.14 + +if.then: ; preds = %entry + br i1 undef, label %if.end.57, label %if.else + +if.else: ; preds = %if.then + unreachable + +if.else.14: ; preds = %entry + br i1 undef, label %if.then.17, label %if.end.57 + +if.then.17: ; preds = %if.else.14 + br i1 undef, label %if.end.57, label %if.then.20 + +if.then.20: ; preds = %if.then.17 + %call21 = tail call i32 @myfun() + %tobool22 = icmp eq i32 %call21, 0 + %0 = tail call i32 @myfun() + br i1 %tobool22, label %if.else.42, label %if.then.23 + +if.then.23: ; preds = %if.then.20 + unreachable + +if.else.42: ; preds = %if.then.20 + ret void + +if.end.57: ; preds = %if.then.17, %if.else.14, %if.then + ret void +} + +declare i32 @myfun() + diff --git a/llvm/test/CodeGen/Hexagon/avoid-predspill.ll b/llvm/test/CodeGen/Hexagon/avoid-predspill.ll index 883c16a51a3..159c149c442 100644 --- a/llvm/test/CodeGen/Hexagon/avoid-predspill.ll +++ b/llvm/test/CodeGen/Hexagon/avoid-predspill.ll @@ -1,6 +1,3 @@ -; This functionality will be restored shortly. -; XFAIL: * - ; RUN: llc -march=hexagon -O2 < %s | FileCheck %s ; ; This checks that predicate registers are moved to GPRs instead of spilling diff --git a/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll b/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll new file mode 100644 index 00000000000..6fb0a3e2658 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/eliminate-pred-spill.ll @@ -0,0 +1,144 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx-double \ +; RUN: -hexagon-bit=0 < %s | FileCheck %s + +; This spill should be eliminated. +; CHECK-NOT: vmem(r29+#6) + +define void @test(i8* noalias nocapture %key, i8* noalias nocapture %data1) #0 { +entry: + %0 = bitcast i8* %key to <32 x i32>* + %1 = bitcast i8* %data1 to <32 x i32>* + br label %for.body + +for.body: + %pkey.0542 = phi <32 x i32>* [ %0, %entry ], [ null, %for.body ] + %pdata0.0541 = phi <32 x i32>* [ null, %entry ], [ %add.ptr48, %for.body ] + %pdata1.0540 = phi <32 x i32>* [ %1, %entry ], [ %add.ptr49, %for.body ] + %dAccum0.0539 = phi <64 x i32> [ undef, %entry ], [ %86, %for.body ] + %2 = load <32 x i32>, <32 x i32>* %pkey.0542, align 128 + %3 = load <32 x i32>, <32 x i32>* %pdata0.0541, align 128 + %4 = load <32 x i32>, <32 x i32>* undef, align 128 + %arrayidx4 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 2 + %5 = load <32 x i32>, <32 x i32>* %arrayidx4, align 128 + %arrayidx5 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 2 + %6 = load <32 x i32>, <32 x i32>* %arrayidx5, align 128 + %7 = load <32 x i32>, <32 x i32>* null, align 128 + %8 = load <32 x i32>, <32 x i32>* undef, align 128 + %9 = load <32 x i32>, <32 x i32>* null, align 128 + %arrayidx9 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 3 + %arrayidx10 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 6 + %10 = load <32 x i32>, <32 x i32>* %arrayidx10, align 128 + %arrayidx12 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 4 + %11 = load <32 x i32>, <32 x i32>* %arrayidx12, align 128 + %arrayidx13 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 8 + %arrayidx14 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 8 + %12 = load <32 x i32>, <32 x i32>* %arrayidx14, align 128 + %arrayidx15 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 5 + %13 = load <32 x i32>, <32 x i32>* %arrayidx15, align 128 + %arrayidx16 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 10 + %arrayidx17 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 10 + %14 = load <32 x i32>, <32 x i32>* %arrayidx17, align 128 + %arrayidx18 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 6 + %15 = load <32 x i32>, <32 x i32>* %arrayidx18, align 128 + %arrayidx19 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 12 + %16 = load <32 x i32>, <32 x i32>* %arrayidx19, align 128 + %arrayidx20 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 12 + %17 = load <32 x i32>, <32 x i32>* %arrayidx20, align 128 + %arrayidx22 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 14 + %18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128 + %arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14 + %19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128 + %20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11) + %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2) + %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11) + %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3) + %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef) + %25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15) + %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7) + %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15) + %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8) + %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16) + %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9) + %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17) + %32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13) + %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4) + %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13) + %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5) + %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef) + %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6) + %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14) + %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef) + %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer) + %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10) + %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18) + %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef) + %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19) + %45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26) + %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21) + %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26) + %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23) + %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28) + %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24) + %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30) + %52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27) + %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22) + %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27) + %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef) + %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31) + %57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39) + %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33) + %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39) + %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35) + %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37) + %62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40) + %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36) + %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44) + %65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58) + %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46) + %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48) + %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50) + %69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59) + %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer) + %71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer) + %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55) + %73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef) + %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64) + %75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67) + %76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef) + %77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72) + %78 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %74, <32 x i32> zeroinitializer) + %79 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %dAccum0.0539, <32 x i32> %75, i32 65537) + %80 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %79, <32 x i32> zeroinitializer, i32 65537) + %81 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %80, <32 x i32> zeroinitializer, i32 65537) + %82 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %81, <32 x i32> %76, i32 65537) + %83 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %82, <32 x i32> %77, i32 65537) + %84 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %83, <32 x i32> zeroinitializer, i32 65537) + %85 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %84, <32 x i32> undef, i32 65537) + %86 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %85, <32 x i32> %78, i32 65537) + store <32 x i32> %66, <32 x i32>* %pkey.0542, align 128 + store <32 x i32> %75, <32 x i32>* %pdata0.0541, align 128 + store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx4, align 128 + store <32 x i32> zeroinitializer, <32 x i32>* undef, align 128 + store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx20, align 128 + store <32 x i32> zeroinitializer, <32 x i32>* null, align 128 + %add.ptr48 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 16 + %add.ptr49 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 16 + br i1 false, label %for.end, label %for.body + +for.end: + %87 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %86) + ret void +} + +declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1 + +declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1 + +declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1 + +declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #1 + +declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll new file mode 100644 index 00000000000..db9ed55d2da --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug-3.ll @@ -0,0 +1,80 @@ +; RUN: llc -O0 -march=hexagon -mcpu=hexagonv60 < %s | FileCheck %s + +; CHECK: vmem + +target triple = "hexagon" + +@vecpreds = external global [15 x <16 x i32>], align 64 +@vectors = external global [15 x <16 x i32>], align 64 +@vector_pairs = external global [15 x <32 x i32>], align 128 +@.str1 = external hidden unnamed_addr constant [20 x i8], align 1 +@.str2 = external hidden unnamed_addr constant [43 x i8], align 1 +@Q6VecPredResult = external global <16 x i32>, align 64 +@.str52 = external hidden unnamed_addr constant [57 x i8], align 1 +@.str54 = external hidden unnamed_addr constant [59 x i8], align 1 +@VectorResult = external global <16 x i32>, align 64 +@.str243 = external hidden unnamed_addr constant [60 x i8], align 1 +@.str251 = external hidden unnamed_addr constant [77 x i8], align 1 +@.str290 = external hidden unnamed_addr constant [65 x i8], align 1 +@VectorPairResult = external global <32 x i32>, align 128 + +; Function Attrs: nounwind +declare void @print_vector(i32, i8*) #0 + +; Function Attrs: nounwind +declare i32 @printf(i8*, ...) #0 + +; Function Attrs: nounwind +declare void @print_vecpred(i32, i8*) #0 + +; Function Attrs: nounwind readnone +declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1 + +; Function Attrs: nounwind +declare void @init_vectors() #0 + +; Function Attrs: nounwind readnone +declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1 + +; Function Attrs: nounwind readnone +declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1 + +; Function Attrs: nounwind +declare void @init_addresses() #0 + +; Function Attrs: nounwind +declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %0 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64 + %1 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64 + call void @print_vecpred(i32 64, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) + %2 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64 + %call50 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([57 x i8], [57 x i8]* @.str52, i32 0, i32 0)) #3 + %3 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64 + %call52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @.str54, i32 0, i32 0)) #3 + %4 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64 + %call300 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str290, i32 0, i32 0)) #3 + %5 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64 + %6 = load <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64 + %call1373 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @.str243, i32 0, i32 0)) #3 + %7 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1) + %call1381 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str251, i32 0, i32 0)) #3 + %8 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1) + %9 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009) + call void @print_vector(i32 64, i8* bitcast (<16 x i32>* @VectorResult to i8*)) + %10 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1) + %11 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009) + %12 = bitcast <512 x i1> %11 to <16 x i32> + %13 = bitcast <16 x i32> %12 to <512 x i1> + %14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %13, <16 x i32> undef, <16 x i32> undef) + store <16 x i32> %14, <16 x i32>* @VectorResult, align 64 + ret i32 0 +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } diff --git a/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll b/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll new file mode 100644 index 00000000000..d120295fa52 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vec-pred-spill1.ll @@ -0,0 +1,80 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv60 -O2 -enable-hexagon-hvx < %s | FileCheck %s + +; CHECK: vmem(r{{[0-9]+}}+#3) = v{{[0-9]+}} +; CHECK: call puts +; CHECK: call print_vecpred +; CHECK: v{{[0-9]+}}{{ *}}={{ *}}vmem(r{{[0-9]+}}+#3) + +target triple = "hexagon" + +@K = global i64 0, align 8 +@src = global i32 -1, align 4 +@Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64 +@dst_addresses = common global [15 x i64] zeroinitializer, align 8 +@ptr_addresses = common global [15 x i8*] zeroinitializer, align 8 +@src_addresses = common global [15 x i8*] zeroinitializer, align 8 +@ptr = common global [32768 x i32] zeroinitializer, align 8 +@vecpreds = common global [15 x <16 x i32>] zeroinitializer, align 64 +@VectorResult = common global <16 x i32> zeroinitializer, align 64 +@vectors = common global [15 x <16 x i32>] zeroinitializer, align 64 +@VectorPairResult = common global <32 x i32> zeroinitializer, align 128 +@vector_pairs = common global [15 x <32 x i32>] zeroinitializer, align 128 +@str = private unnamed_addr constant [106 x i8] c"Q6VecPred4 : Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),INT32_MIN)\00" +@str3 = private unnamed_addr constant [99 x i8] c"Q6VecPred4 : Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),-1)\00" +@str4 = private unnamed_addr constant [98 x i8] c"Q6VecPred4 : Q6_Q_vandor_QVR(Q6_Q_vand_VR(Q6_V_vsplat_R(1+1),(0x01010101)),Q6_V_vsplat_R(0+1),0)\00" + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %call = tail call i32 bitcast (i32 (...)* @init_addresses to i32 ()*)() #3 + %call1 = tail call i32 @acquire_vector_unit(i8 zeroext 0) #3 + tail call void @init_vectors() #3 + %0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2) + %1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009) + %2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1) + %3 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -2147483648) + %4 = bitcast <512 x i1> %3 to <16 x i32> + store <16 x i32> %4, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1 + %puts = tail call i32 @puts(i8* getelementptr inbounds ([106 x i8], [106 x i8]* @str, i32 0, i32 0)) + tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3 + %5 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -1) + %6 = bitcast <512 x i1> %5 to <16 x i32> + store <16 x i32> %6, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1 + %puts5 = tail call i32 @puts(i8* getelementptr inbounds ([99 x i8], [99 x i8]* @str3, i32 0, i32 0)) + tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3 + %7 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 0) + %8 = bitcast <512 x i1> %7 to <16 x i32> + store <16 x i32> %8, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1 + %puts6 = tail call i32 @puts(i8* getelementptr inbounds ([98 x i8], [98 x i8]* @str4, i32 0, i32 0)) + tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3 + ret i32 0 +} + +declare i32 @init_addresses(...) #1 + +declare i32 @acquire_vector_unit(i8 zeroext) #1 + +declare void @init_vectors() #1 + +; Function Attrs: nounwind readnone +declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #2 + +; Function Attrs: nounwind readnone +declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2 + +; Function Attrs: nounwind readnone +declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #2 + +declare void @print_vecpred(i32, i8*) #1 + +; Function Attrs: nounwind +declare i32 @puts(i8* nocapture readonly) #3 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + +!1 = !{!2, !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} |

