summaryrefslogtreecommitdiffstats
path: root/platforms/astbmc
diff options
context:
space:
mode:
authorRussell Currey <ruscur@russell.cc>2016-07-05 15:05:35 +1000
committerStewart Smith <stewart@linux.vnet.ibm.com>2016-07-20 18:14:47 +1000
commit62bf371705955b4e32da9f3d18897333a0a34869 (patch)
treebfddb5ef1a780c6a8787db4b154e8d3ac1825bac /platforms/astbmc
parent8126e4c69407c1f9957ab2313015f46a30e1f1ec (diff)
downloadblackbird-skiboot-62bf371705955b4e32da9f3d18897333a0a34869.tar.gz
blackbird-skiboot-62bf371705955b4e32da9f3d18897333a0a34869.zip
nvlink: Associate and allocate NPUs using slots
Allocating BDFNs to NPU devices and associating NPU devices with PCI devices of GPUs both rely on comparing PBCQ handles. This will fail if a system has multiple sets of GPUs behind a single PHB. Rework this to instead use slot locations. The following changes are introduced: - Groups of NPU links that connect to the same GPU are presented in the slot table entries as st_npu_slot, using ST_LOC_NPU_GROUP - NPU links are created with the ibm,npu-group-id property replacing the ibm,pbcq property, which is used in BDFN allocation and GPU association - Slot comparison is handled slightly differently for NPU devices as the function of the BDFN is ignored, since the device number represents the physical GPU the link is connected to - BDFN allocation for NPU devices is now derived from the groups in the slot table. For Garrison, the same BDFNs are generated as before. - Association with GPU PCI devices is performed by comparing the slot label. This means for future machines with NPUs that slot labels are compulsory to have NVLink functionality working. Signed-off-by: Russell Currey <ruscur@russell.cc> Reviewed-By: Alistair Popple <alistair@popple.id.au> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Diffstat (limited to 'platforms/astbmc')
-rw-r--r--platforms/astbmc/astbmc.h8
-rw-r--r--platforms/astbmc/garrison.c59
-rw-r--r--platforms/astbmc/slots.c10
3 files changed, 34 insertions, 43 deletions
diff --git a/platforms/astbmc/astbmc.h b/platforms/astbmc/astbmc.h
index 23c31c7c..322282ef 100644
--- a/platforms/astbmc/astbmc.h
+++ b/platforms/astbmc/astbmc.h
@@ -20,6 +20,13 @@
#define ST_LOC_PHB(chip_id, phb_idx) ((chip_id) << 16 | (phb_idx))
#define ST_LOC_DEVFN(dev, fn) ((dev) << 3 | (fn))
+/*
+ * NPU groups are used to allocate device numbers. There is a 1 to 1
+ * correlation between a NPU group and a physical GPU. Links within a group
+ * are allocated as functions within a device, so groups must be numbered
+ * sequentially starting at 0.
+ */
+#define ST_LOC_NPU_GROUP(group_id) (group_id << 3)
struct slot_table_entry {
enum slot_table_etype {
@@ -27,6 +34,7 @@ struct slot_table_entry {
st_phb,
st_pluggable_slot,
st_builtin_dev,
+ st_npu_slot
} etype;
uint32_t location;
const char *name;
diff --git a/platforms/astbmc/garrison.c b/platforms/astbmc/garrison.c
index 3ff84a3d..f400a51f 100644
--- a/platforms/astbmc/garrison.c
+++ b/platforms/astbmc/garrison.c
@@ -63,23 +63,13 @@ static const struct slot_table_entry garrison_phb0_3_slot[] = {
static const struct slot_table_entry garrison_npu0_slots[] = {
{
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(0,0),
- .name = "GPU2",
- },
- {
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(0,1),
+ .etype = st_npu_slot,
+ .location = ST_LOC_NPU_GROUP(0),
.name = "GPU2",
},
{
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(1,0),
- .name = "GPU1",
- },
- {
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(1,1),
+ .etype = st_npu_slot,
+ .location = ST_LOC_NPU_GROUP(1),
.name = "GPU1",
},
{ .etype = st_end },
@@ -152,23 +142,13 @@ static const struct slot_table_entry garrison_phb1_3_slot[] = {
static const struct slot_table_entry garrison_npu1_slots[] = {
{
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(0,0),
- .name = "GPU4",
- },
- {
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(0,1),
+ .etype = st_npu_slot,
+ .location = ST_LOC_NPU_GROUP(0),
.name = "GPU4",
},
{
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(1,0),
- .name = "GPU3",
- },
- {
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(1,1),
+ .etype = st_npu_slot,
+ .location = ST_LOC_NPU_GROUP(1),
.name = "GPU3",
},
{ .etype = st_end },
@@ -233,7 +213,7 @@ static const struct slot_table_entry garrison_phb_table[] = {
#define NPU_INDIRECT0 0x8000000008010c3f
#define NPU_INDIRECT1 0x8000000008010c7f
-static void create_link(struct dt_node *npu, struct dt_node *pbcq, int index)
+static void create_link(struct dt_node *npu, int group, int index)
{
struct dt_node *link;
uint32_t lane_mask;
@@ -255,12 +235,12 @@ static void create_link(struct dt_node *npu, struct dt_node *pbcq, int index)
}
dt_add_property_u64s(link, "ibm,npu-phy", phy);
dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
- dt_add_property_cells(link, "ibm,npu-pbcq", pbcq->phandle);
+ dt_add_property_cells(link, "ibm,npu-group-id", group);
}
static void dt_create_npu(void)
{
- struct dt_node *xscom, *npu, *pbcq;
+ struct dt_node *xscom, *npu;
char namebuf[32];
dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
@@ -275,17 +255,12 @@ static void dt_create_npu(void)
dt_add_property_cells(npu, "ibm,npu-index", 0);
dt_add_property_cells(npu, "ibm,npu-links", 4);
- /* On Garrison we have 2 links per GPU device. The
- * first 2 links go to the GPU connected via
- * pbcq@2012c00 the second two via pbcq@2012800. */
- pbcq = dt_find_by_name(xscom, "pbcq@2012c00");
- assert(pbcq);
- create_link(npu, pbcq, 0);
- create_link(npu, pbcq, 1);
- pbcq = dt_find_by_name(xscom, "pbcq@2012800");
- assert(pbcq);
- create_link(npu, pbcq, 4);
- create_link(npu, pbcq, 5);
+ /* On Garrison we have 2 links per GPU device. These are
+ * grouped together as per the slot tables above. */
+ create_link(npu, 0, 0);
+ create_link(npu, 0, 1);
+ create_link(npu, 1, 4);
+ create_link(npu, 1, 5);
}
}
diff --git a/platforms/astbmc/slots.c b/platforms/astbmc/slots.c
index 36547e11..678a3cc2 100644
--- a/platforms/astbmc/slots.c
+++ b/platforms/astbmc/slots.c
@@ -54,6 +54,7 @@ static const struct slot_table_entry *match_slot_dev_entry(struct phb *phb,
struct pci_device *pd)
{
const struct slot_table_entry *parent, *ent;
+ uint32_t bdfn;
/* Find a parent recursively */
if (pd->parent)
@@ -70,7 +71,14 @@ static const struct slot_table_entry *match_slot_dev_entry(struct phb *phb,
prerror("SLOT: Bad PHB entry type in table !\n");
continue;
}
- if (ent->location == (pd->bdfn & 0xff))
+
+ /* NPU slots match on device, not function */
+ if (ent->etype == st_npu_slot)
+ bdfn = pd->bdfn & 0xf8;
+ else
+ bdfn = pd->bdfn & 0xff;
+
+ if (ent->location == bdfn)
return ent;
}
return NULL;
OpenPOWER on IntegriCloud