summaryrefslogtreecommitdiffstats
path: root/include/npu2.h
blob: d58aab47bb30c558905d196c665de4c006e5423b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
/* Copyright 2013-2016 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef __NPU2_H
#define __NPU2_H

#include <pci.h>
#include <phys-map.h>
#include <npu2-regs.h>

/* Debugging options */
#define NPU2DBG(p, fmt, a...)	prlog(PR_DEBUG, "NPU%d: " fmt, \
				      (p)->phb_nvlink.opal_id, ##a)
#define NPU2INF(p, fmt, a...)	prlog(PR_INFO,  "NPU%d: " fmt, \
				      (p)->phb_nvlink.opal_id, ##a)
#define NPU2ERR(p, fmt, a...)	prlog(PR_ERR,   "NPU%d: " fmt, \
				      (p)->phb_nvlink.opal_id, ##a)

#define NPU2DEVLOG(l, p, fmt, a...)	prlog(l, "NPU%d:%d:%d.%d " fmt, \
					      (p)->npu->phb_nvlink.opal_id, \
					      ((p)->bdfn >> 8) & 0xff, \
					      ((p)->bdfn >> 3) & 0x1f, \
					      (p)->bdfn & 0x7, ##a)
#define NPU2DEVDBG(p, fmt, a...)	NPU2DEVLOG(PR_DEBUG, p, fmt, ##a)
#define NPU2DEVINF(p, fmt, a...)	NPU2DEVLOG(PR_INFO, p, fmt, ##a)
#define NPU2DEVERR(p, fmt, a...)	NPU2DEVLOG(PR_ERR, p, fmt, ##a)

#define OCAPIDBG(dev, fmt, a...)    prlog(PR_DEBUG, "OCAPI[%d:%d]: " fmt, \
					  dev->npu->chip_id, dev->brick_index, ## a)
#define OCAPIINF(dev, fmt, a...)    prlog(PR_INFO, "OCAPI[%d:%d]: " fmt, \
					  dev->npu->chip_id, dev->brick_index, ## a)
#define OCAPIERR(dev, fmt, a...)    prlog(PR_ERR, "OCAPI[%d:%d]: " fmt, \
					  dev->npu->chip_id, dev->brick_index, ## a)


/*
 * Number of PEs supported
 *
 * The NPU supports PE numbers from 0-15. At present, we only assign a maximum
 * of 1 PE per brick.
 *
 * NVLink devices are currently exposed to Linux underneath a single virtual
 * PHB. Therefore, we give NVLink half the available PEs, which is enough for
 * 6 bricks plus 1 reserved PE.
 *
 * For OpenCAPI, the BDF-to-PE registers are used exclusively for mapping
 * bricks to System Interrupt Log registers (the BDF component of those
 * registers is ignored). Currently, we allocate a fixed PE based on the brick
 * index in the upper half of the PE namespace.
 */
#define NPU2_MAX_PE_NUM		8
#define NPU2_RESERVED_PE_NUM	7
#define NPU2_OCAPI_PE(ndev) ((ndev)->brick_index + NPU2_MAX_PE_NUM)

#define NPU2_LINKS_PER_CHIP 6

/* Link flags */
#define NPU2_DEV_PCI_LINKED	0x1
#define NPU2_DEV_DL_RESET	0x2

/* Return the stack (0-2) of a device */
#define NPU2DEV_STACK(ndev) ((ndev)->brick_index / 2)

/* Return the brick number (0-1) within a stack */
#define NPU2DEV_BRICK(ndev) ((ndev)->brick_index % 2)

/* This represents the state of the actual hardware BARs not the
 * emulated PCIe BARs. The is a subtle difference between the two as
 * not all BARs are exposed outside of skiboot. */
struct npu2_bar {
	enum phys_map_type	type;
	int			index;
#define NPU2_BAR_FLAG_ENABLED	0x0010

/* Generation ID's are a single space in the hardware but we split
 * them in two for the emulated PCIe devices so we need to keep track
 * of which one has been enabled/disabled. */
#define NPU2_BAR_FLAG_ENABLED0	0x0080
#define NPU2_BAR_FLAG_ENABLED1  0x0100
	uint32_t		flags;
	uint64_t		base;
	uint64_t		size;
	uint64_t		reg;
};

/* Rpresents a BAR that is exposed via the PCIe emulated
 * devices */
struct npu2_pcie_bar {
#define NPU2_PCIE_BAR_FLAG_SIZE_HI	0x0020
#define NPU2_PCIE_BAR_FLAG_TRAPPED	0x0040
	uint32_t		flags;
	struct npu2_bar		npu2_bar;
};

enum npu2_dev_type {
	NPU2_DEV_TYPE_UNKNOWN,
	NPU2_DEV_TYPE_NVLINK,
	NPU2_DEV_TYPE_OPENCAPI,
};

struct npu2;

struct npu2_dev_nvlink {
	/* For NVLink, device and function numbers are allocated based
	 * on GPU association. Links to connected to the same GPU will
	 * be exposed as different functions of the same
	 * bus/device. */
	uint32_t		gpu_bdfn;

	/* PCI virtual device and the associated GPU device */
	struct pci_virt_device	*pvd;
	struct phb		*phb;
	struct pci_device	*pd;

	uint8_t			link_flags;

	/* Used to associate the NPU device with GPU PCI devices */
	const char		*slot_label;
};

struct npu2_dev {
	enum npu2_dev_type	type;
	uint32_t		link_index;
	uint32_t		brick_index;
	uint64_t		pl_xscom_base;
	struct dt_node		*dt_node;
	struct npu2_pcie_bar	bars[2];
	struct npu2		*npu;

	uint32_t		bdfn;

	/* Which PHY lanes this device is associated with */
	uint32_t		lane_mask;
	uint64_t		link_speed; /* not used for NVLink */

	/* Track currently running procedure and step number */
	uint16_t		procedure_number;
	uint16_t		procedure_step;
	unsigned long		procedure_tb;
	uint32_t		procedure_status;

	/* NVLink */
	struct npu2_dev_nvlink	nvlink;

	/* OpenCAPI */
	struct phb		phb_ocapi;
	uint64_t		linux_pe;
	bool			train_need_fence;
	bool			train_fenced;
};

struct npu2 {
	uint32_t	index;
	struct dt_node	*dt_node;
	uint32_t	chip_id;
	uint64_t	xscom_base;
	void		*regs;
	uint64_t	mm_base;
	uint64_t	mm_size;
	uint32_t	base_lsi;
	uint32_t	total_devices;
	struct npu2_dev	*devices;
	enum phys_map_type gpu_map_type;
	int		ctx_ref[NPU2_XTS_BDF_MAP_SIZE];

	/* IODA cache */
	uint64_t	tve_cache[16];
	bool		tx_zcal_complete[2];

	/* Used to protect global MMIO space, in particular the XTS
	 * tables. */
	struct lock	lock;

	/* NVLink */
	struct phb	phb_nvlink;
	uint32_t	phb_index;

	/* OCAPI */
	uint64_t	i2c_port_id_ocapi;
	struct lock	i2c_lock;
	uint8_t		i2c_pin_mode;
	uint8_t		i2c_pin_wr_state;
};

static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
{
	assert(phb->phb_type == phb_type_npu_v2);
	return container_of(phb, struct npu2, phb_nvlink);
}

static inline struct npu2_dev *phb_to_npu2_dev_ocapi(struct phb *phb)
{
	assert(phb->phb_type == phb_type_npu_v2_opencapi);
	return container_of(phb, struct npu2_dev, phb_ocapi);
}

static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev)
{
	switch (ndev->type) {
	case NPU2_DEV_TYPE_NVLINK:
		return &ndev->npu->phb_nvlink;
	case NPU2_DEV_TYPE_OPENCAPI:
		return &ndev->phb_ocapi;
	default:
		assert(false);
	}
}

void npu2_i2c_presence_detect(struct npu2 *npu);
int npu2_opencapi_init_npu(struct npu2 *npu);
int npu2_nvlink_init_npu(struct npu2 *npu);
void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn);

void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val);
uint32_t npu2_read_4b(struct npu2 *p, uint64_t reg);
void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val);
uint64_t npu2_read(struct npu2 *p, uint64_t reg);
void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mask);
int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
			   uint32_t offset, uint32_t len, uint32_t *data,
			   bool write);
void npu2_dev_procedure_reset(struct npu2_dev *dev);

void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag);
void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag);
uint32_t reset_ntl(struct npu2_dev *ndev);
extern int nv_zcal_nominal;
void npu2_opencapi_phy_init(struct npu2_dev *dev);
void npu2_opencapi_phy_reset(struct npu2_dev *dev);
void npu2_opencapi_phy_prbs31(struct npu2_dev *dev);
void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev);
int64_t npu2_freeze_status(struct phb *phb __unused,
			   uint64_t pe_number __unused,
			   uint8_t *freeze_state,
			   uint16_t *pci_error_type __unused,
			   uint16_t *severity __unused);
void npu2_dump_scoms(int chip_id);
#endif /* __NPU2_H */
OpenPOWER on IntegriCloud