/* Copyright 2013-2015 IBM Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __NPU_H #define __NPU_H #include /* Number of PEs supported */ #define NPU_NUM_OF_PES 4 /* Each brick has 2 MMIO BARs at the maximum. BAR0 is always used to * map the 128KB TL/DL registers. BAR1 is used to map either the PL or * the AT registers which are not exposed to the OS. */ #define NPU_BRICK_NUM_OF_BARS 2 #define NPU_BRICK_TL_BAR_SIZE 0x20000 #define NPU_BRICK_PL_BAR_SIZE 0x200000 /* Bytes of the emulated NPU PCI device config space. We are * emulating PCI express device, not legacy one */ #define NPU_DEV_CFG_SIZE 0x100 /* Interrupt mapping * * NPU PHB doesn't support MSI interrupts. It only supports * 8 LSI interrupts: [0, 3] for bricks' DL blocks. [4, 5] * for reporting errors from DL blocks. [6, 7] for reporting * errors from TL blocks, NPCQs and AT. */ #define NPU_LSI_IRQ_COUNT 8 #define NPU_LSI_INT_DL0 0 #define NPU_LSI_INT_DL1 1 #define NPU_LSI_INT_DL2 2 #define NPU_LSI_INT_DL3 3 #define NPU_LSI_IRQ_MIN 0x7F0 #define NPU_LSI_IRQ_MAX (NPU_LSI_IRQ_MIN + NPU_LSI_IRQ_COUNT - 1) #define NPU_LSI_IRQ_BASE(chip, phb) (P8_CHIP_IRQ_PHB_BASE(chip, phb) | NPU_LSI_IRQ_MIN) #define NPU_IRQ_NUM(irq) (irq & 0x7FF) /* NPU device capability descriptor. All PCI capabilities is * organized as linked list. Each PCI capability has specific * hook to populate when initializing NPU device. */ struct npu_dev; struct npu_dev_cap { uint16_t id; uint16_t start; uint16_t end; struct npu_dev *dev; void (*populate)(struct npu_dev_cap *cap); struct list_node link; }; struct npu_dev_bar { uint32_t flags; uint32_t xscom; uint64_t base; uint64_t size; uint32_t bar_sz; bool trapped; }; /* Each device contains 2 links. The device will be exposed as * standard PCIE device and the config space is emulated by skiboot. */ struct npu_dev { uint32_t flags; uint32_t index; uint64_t xscom; void *pl_base; uint64_t pl_xscom_base; struct npu_dev_bar bar; struct phb *phb; /* The link@x node */ struct dt_node *dt_node; /* PCI virtual device and the associated GPU device */ struct pci_virt_device *pvd; struct pci_device *pd; struct npu *npu; struct list_head capabilities; /* Which PHY lanes this device is associated with */ uint16_t lane_mask; /* Used to store the currently running procedure number for * this device. */ uint16_t procedure_number; /* Used to store the step within a procedure that we are up * to. */ uint16_t procedure_step; /* Arbitrary data used by each procedure to track status. */ uint64_t procedure_data; /* Used to timeout long running procedures. */ unsigned long procedure_tb; uint32_t procedure_status; uint64_t pe_number; /* Used to associate the NPU device with GPU PCI devices */ const char *slot_label; }; /* NPU PHB descriptor */ struct npu { uint32_t flags; uint32_t index; uint32_t chip_id; uint64_t xscom_base; uint64_t at_xscom; void *at_regs; uint32_t base_lsi; uint64_t mm_base; uint64_t mm_size; uint32_t total_devices; struct npu_dev *devices; /* IODA cache */ uint64_t lxive_cache[8]; uint64_t pce_cache[6]; uint64_t tve_cache[NPU_NUM_OF_PES]; bool tx_zcal_complete[2]; bool fenced; struct phb phb; }; static inline struct npu *phb_to_npu(struct phb *phb) { return container_of(phb, struct npu, phb); } static inline void npu_ioda_sel(struct npu *p, uint32_t table, uint32_t addr, bool autoinc) { out_be64(p->at_regs + NPU_IODA_ADDR, (autoinc ? NPU_IODA_AD_AUTOINC : 0) | SETFIELD(NPU_IODA_AD_TSEL, 0ul, table) | SETFIELD(NPU_IODA_AD_TADR, 0ul, addr)); } void npu_scom_init(struct npu_dev *dev); int64_t npu_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf, uint32_t offset, uint32_t len, uint32_t *data, bool write); void npu_set_fence_state(struct npu *p, bool fence); void npu_dev_procedure_reset(struct npu_dev *dev); #define NPUDBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \ (p)->phb.opal_id, ##a) #define NPUINF(p, fmt, a...) prlog(PR_INFO, "NPU%d: " fmt, \ (p)->phb.opal_id, ##a) #define NPUDEVDBG(p, fmt, a...) NPUDBG((p)->npu, fmt, ##a) #define NPUDEVINF(p, fmt, a...) NPUINF((p)->npu, fmt, ##a) #endif /* __NPU_H */