summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/R600/R600InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/R600/R600InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/R600/R600InstrInfo.cpp185
1 files changed, 146 insertions, 39 deletions
diff --git a/llvm/lib/Target/R600/R600InstrInfo.cpp b/llvm/lib/Target/R600/R600InstrInfo.cpp
index 354f039c1e8..974a5794505 100644
--- a/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ b/llvm/lib/Target/R600/R600InstrInfo.cpp
@@ -250,8 +250,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const {
std::vector<std::pair<int, unsigned> >
R600InstrInfo::ExtractSrcs(MachineInstr *MI,
- const DenseMap<unsigned, unsigned> &PV)
- const {
+ const DenseMap<unsigned, unsigned> &PV,
+ unsigned &ConstCount) const {
+ ConstCount = 0;
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
const std::pair<int, unsigned> DummyPair(-1, 0);
std::vector<std::pair<int, unsigned> > Result;
@@ -259,18 +260,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI,
for (unsigned n = Srcs.size(); i < n; ++i) {
unsigned Reg = Srcs[i].first->getReg();
unsigned Index = RI.getEncodingValue(Reg) & 0xff;
- unsigned Chan = RI.getHWRegChan(Reg);
if (Reg == AMDGPU::OQAP) {
Result.push_back(std::pair<int, unsigned>(Index, 0));
}
- if (Index > 127) {
- Result.push_back(DummyPair);
+ if (PV.find(Reg) != PV.end()) {
+ // 255 is used to tells its a PS/PV reg
+ Result.push_back(std::pair<int, unsigned>(255, 0));
continue;
}
- if (PV.find(Reg) != PV.end()) {
+ if (Index > 127) {
+ ConstCount++;
Result.push_back(DummyPair);
continue;
}
+ unsigned Chan = RI.getHWRegChan(Reg);
Result.push_back(std::pair<int, unsigned>(Index, Chan));
}
for (; i < 3; ++i)
@@ -305,23 +308,51 @@ Swizzle(std::vector<std::pair<int, unsigned> > Src,
return Src;
}
-bool
-R600InstrInfo::isLegal(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
- const std::vector<R600InstrInfo::BankSwizzle> &Swz,
- unsigned CheckedSize) const {
+static unsigned
+getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
+ switch (Swz) {
+ case R600InstrInfo::ALU_VEC_012_SCL_210: {
+ unsigned Cycles[3] = { 2, 1, 0};
+ return Cycles[Op];
+ }
+ case R600InstrInfo::ALU_VEC_021_SCL_122: {
+ unsigned Cycles[3] = { 1, 2, 2};
+ return Cycles[Op];
+ }
+ case R600InstrInfo::ALU_VEC_120_SCL_212: {
+ unsigned Cycles[3] = { 2, 1, 2};
+ return Cycles[Op];
+ }
+ case R600InstrInfo::ALU_VEC_102_SCL_221: {
+ unsigned Cycles[3] = { 2, 2, 1};
+ return Cycles[Op];
+ }
+ default:
+ llvm_unreachable("Wrong Swizzle for Trans Slot");
+ return 0;
+ }
+}
+
+/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
+/// in the same Instruction Group while meeting read port limitations given a
+/// Swz swizzle sequence.
+unsigned R600InstrInfo::isLegalUpTo(
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ const std::vector<R600InstrInfo::BankSwizzle> &Swz,
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ R600InstrInfo::BankSwizzle TransSwz) const {
int Vector[4][3];
memset(Vector, -1, sizeof(Vector));
- for (unsigned i = 0; i < CheckedSize; i++) {
+ for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
const std::vector<std::pair<int, unsigned> > &Srcs =
Swizzle(IGSrcs[i], Swz[i]);
for (unsigned j = 0; j < 3; j++) {
const std::pair<int, unsigned> &Src = Srcs[j];
- if (Src.first < 0)
+ if (Src.first < 0 || Src.first == 255)
continue;
if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
- if (Swz[i] != R600InstrInfo::ALU_VEC_012 &&
- Swz[i] != R600InstrInfo::ALU_VEC_021) {
+ if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
+ Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
// The value from output queue A (denoted by register OQAP) can
// only be fetched during the first cycle.
return false;
@@ -332,51 +363,126 @@ R600InstrInfo::isLegal(
if (Vector[Src.second][j] < 0)
Vector[Src.second][j] = Src.first;
if (Vector[Src.second][j] != Src.first)
- return false;
+ return i;
}
}
- return true;
+ // Now check Trans Alu
+ for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
+ const std::pair<int, unsigned> &Src = TransSrcs[i];
+ unsigned Cycle = getTransSwizzle(TransSwz, i);
+ if (Src.first < 0)
+ continue;
+ if (Src.first == 255)
+ continue;
+ if (Vector[Src.second][Cycle] < 0)
+ Vector[Src.second][Cycle] = Src.first;
+ if (Vector[Src.second][Cycle] != Src.first)
+ return IGSrcs.size() - 1;
+ }
+ return IGSrcs.size();
}
-bool
-R600InstrInfo::recursiveFitsFPLimitation(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
- std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
- unsigned Depth) const {
- if (!isLegal(IGSrcs, SwzCandidate, Depth))
+/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
+/// (in lexicographic term) swizzle sequence assuming that all swizzles after
+/// Idx can be skipped
+static bool
+NextPossibleSolution(
+ std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+ unsigned Idx) {
+ assert(Idx < SwzCandidate.size());
+ int ResetIdx = Idx;
+ while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
+ ResetIdx --;
+ for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
+ SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
+ }
+ if (ResetIdx == -1)
return false;
- if (IGSrcs.size() == Depth)
- return true;
- unsigned i = SwzCandidate[Depth];
- for (; i < 6; i++) {
- SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
- if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
+ SwzCandidate[ResetIdx]++;
+ return true;
+}
+
+/// Enumerate all possible Swizzle sequence to find one that can meet all
+/// read port requirements.
+bool R600InstrInfo::FindSwizzleForVectorSlot(
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ R600InstrInfo::BankSwizzle TransSwz) const {
+ unsigned ValidUpTo = 0;
+ do {
+ ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
+ if (ValidUpTo == IGSrcs.size())
return true;
- }
- SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
+ } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
return false;
}
+/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
+/// a const, and can't read a gpr at cycle 1 if they read 2 const.
+static bool
+isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
+ const std::vector<std::pair<int, unsigned> > &TransOps,
+ unsigned ConstCount) {
+ for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
+ const std::pair<int, unsigned> &Src = TransOps[i];
+ unsigned Cycle = getTransSwizzle(TransSwz, i);
+ if (Src.first < 0)
+ continue;
+ if (ConstCount > 0 && Cycle == 0)
+ return false;
+ if (ConstCount > 1 && Cycle == 1)
+ return false;
+ }
+ return true;
+}
+
bool
R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
- const DenseMap<unsigned, unsigned> &PV,
- std::vector<BankSwizzle> &ValidSwizzle)
+ const DenseMap<unsigned, unsigned> &PV,
+ std::vector<BankSwizzle> &ValidSwizzle,
+ bool isLastAluTrans)
const {
//Todo : support shared src0 - src1 operand
std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
ValidSwizzle.clear();
+ unsigned ConstCount;
+ BankSwizzle TransBS;
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
- IGSrcs.push_back(ExtractSrcs(IG[i], PV));
+ IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
unsigned Op = getOperandIdx(IG[i]->getOpcode(),
AMDGPU::OpName::bank_swizzle);
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
IG[i]->getOperand(Op).getImm());
}
- bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
- if (!Result)
- return false;
- return true;
+ std::vector<std::pair<int, unsigned> > TransOps;
+ if (!isLastAluTrans)
+ return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
+
+ TransOps = IGSrcs.back();
+ IGSrcs.pop_back();
+ ValidSwizzle.pop_back();
+
+ static const R600InstrInfo::BankSwizzle TransSwz[] = {
+ ALU_VEC_012_SCL_210,
+ ALU_VEC_021_SCL_122,
+ ALU_VEC_120_SCL_212,
+ ALU_VEC_102_SCL_221
+ };
+ for (unsigned i = 0; i < 4; i++) {
+ TransBS = TransSwz[i];
+ if (!isConstCompatible(TransBS, TransOps, ConstCount))
+ continue;
+ bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
+ TransBS);
+ if (Result) {
+ ValidSwizzle.push_back(TransBS);
+ return true;
+ }
+ }
+
+ return false;
}
@@ -406,7 +512,8 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
}
bool
-R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
+ const {
std::vector<unsigned> Consts;
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
MachineInstr *MI = MIs[i];
OpenPOWER on IntegriCloud