diff options
Diffstat (limited to 'drivers/misc')
121 files changed, 5593 insertions, 6033 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 16900357afc2..7f0d48f406e3 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -8,7 +8,6 @@ menu "Misc devices" config SENSORS_LIS3LV02D tristate depends on INPUT - select INPUT_POLLDEV config AD525X_DPOT tristate "Analog Devices Digital Potentiometers" @@ -126,18 +125,6 @@ config INTEL_MID_PTI an Intel Atom (non-netbook) mobile device containing a MIPI P1149.7 standard implementation. -config SGI_IOC4 - tristate "SGI IOC4 Base IO support" - depends on PCI - ---help--- - This option enables basic support for the IOC4 chip on certain - SGI IO controller cards (IO9, IO10, and PCI-RT). This option - does not enable any specific functions on such a card, but provides - necessary infrastructure for other drivers to utilize. - - If you have an SGI Altix with an IOC4-based card say Y. - Otherwise say N. - config TIFM_CORE tristate "TI Flash Media interface support" depends on PCI @@ -200,9 +187,8 @@ config ENCLOSURE_SERVICES config SGI_XP tristate "Support communication between SGI SSIs" depends on NET - depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP - select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 - select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 + depends on (IA64_SGI_UV || X86_UV) && SMP + depends on X86_64 || BROKEN select SGI_GRU if X86_64 && SMP ---help--- An SGI machine can be divided into multiple Single System @@ -339,14 +325,14 @@ config SENSORS_TSL2550 will be called tsl2550. config SENSORS_BH1770 - tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" - depends on I2C - ---help--- - Say Y here if you want to build a driver for BH1770GLC (ROHM) or + tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" + depends on I2C + ---help--- + Say Y here if you want to build a driver for BH1770GLC (ROHM) or SFH7770 (Osram) combined ambient light and proximity sensor chip. - To compile this driver as a module, choose M here: the - module will be called bh1770glc. If unsure, say N here. + To compile this driver as a module, choose M here: the + module will be called bh1770glc. If unsure, say N here. config SENSORS_APDS990X tristate "APDS990X combined als and proximity sensors" @@ -375,15 +361,6 @@ config DS1682 This driver can also be built as a module. If so, the module will be called ds1682. -config SPEAR13XX_PCIE_GADGET - bool "PCIe gadget support for SPEAr13XX platform" - depends on ARCH_SPEAR13XX && BROKEN - help - This option enables gadget support for PCIe controller. If - board file defines any controller as PCIe endpoint then a sysfs - entry will be created for that controller. User can use these - sysfs node to configure PCIe EP as per his requirements. - config VMWARE_BALLOON tristate "VMware Balloon Driver" depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST @@ -460,8 +437,8 @@ config PCI_ENDPOINT_TEST select CRC32 tristate "PCI Endpoint Test driver" ---help--- - Enable this configuration option to enable the host side test driver - for PCI Endpoint. + Enable this configuration option to enable the host side test driver + for PCI Endpoint. config XILINX_SDFEC tristate "Xilinx SDFEC 16" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index abd8ae249746..c1860d35dc7e 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_QCOM_COINCELL) += qcom-coincell.o obj-$(CONFIG_QCOM_FASTRPC) += fastrpc.o obj-$(CONFIG_SENSORS_BH1770) += bh1770glc.o obj-$(CONFIG_SENSORS_APDS990X) += apds990x.o -obj-$(CONFIG_SGI_IOC4) += ioc4.o obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o obj-$(CONFIG_KGDB_TESTS) += kgdbts.o obj-$(CONFIG_SGI_XP) += sgi-xp/ @@ -37,7 +36,6 @@ obj-$(CONFIG_C2PORT) += c2port/ obj-$(CONFIG_HMC6352) += hmc6352.o obj-y += eeprom/ obj-y += cb710/ -obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o obj-$(CONFIG_PCH_PHUB) += pch_phub.o obj-y += ti-st/ diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c index 08b5b639d77f..7de7840f613c 100644 --- a/drivers/misc/atmel_tclib.c +++ b/drivers/misc/atmel_tclib.c @@ -109,7 +109,6 @@ static int __init tc_probe(struct platform_device *pdev) struct atmel_tc *tc; struct clk *clk; int irq; - struct resource *r; unsigned int i; if (of_get_child_count(pdev->dev.of_node)) @@ -133,8 +132,7 @@ static int __init tc_probe(struct platform_device *pdev) if (IS_ERR(tc->slow_clk)) return PTR_ERR(tc->slow_clk); - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - tc->regs = devm_ioremap_resource(&pdev->dev, r); + tc->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(tc->regs)) return PTR_ERR(tc->regs); diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile index d9bff5a2217e..1f56267ed2f4 100644 --- a/drivers/misc/cardreader/Makefile +++ b/drivers/misc/cardreader/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MISC_ALCOR_PCI) += alcor_pci.o obj-$(CONFIG_MISC_RTSX_PCI) += rtsx_pci.o -rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o rts5261.o obj-$(CONFIG_MISC_RTSX_USB) += rtsx_usb.o diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c index bcb10fa4bc3a..cd402c89189e 100644 --- a/drivers/misc/cardreader/alcor_pci.c +++ b/drivers/misc/cardreader/alcor_pci.c @@ -38,12 +38,18 @@ static const struct alcor_dev_cfg au6621_cfg = { .dma = 1, }; +static const struct alcor_dev_cfg au6625_cfg = { + .dma = 0, +}; + static const struct pci_device_id pci_ids[] = { { PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6601), .driver_data = (kernel_ulong_t)&alcor_cfg }, { PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6621), .driver_data = (kernel_ulong_t)&au6621_cfg }, - { }, + { PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6625), + .driver_data = (kernel_ulong_t)&au6625_cfg }, + {}, }; MODULE_DEVICE_TABLE(pci, pci_ids); @@ -334,8 +340,7 @@ static void alcor_pci_remove(struct pci_dev *pdev) #ifdef CONFIG_PM_SLEEP static int alcor_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct alcor_pci_priv *priv = pci_get_drvdata(pdev); + struct alcor_pci_priv *priv = dev_get_drvdata(dev); alcor_pci_aspm_ctrl(priv, 1); return 0; @@ -344,8 +349,7 @@ static int alcor_suspend(struct device *dev) static int alcor_resume(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct alcor_pci_priv *priv = pci_get_drvdata(pdev); + struct alcor_pci_priv *priv = dev_get_drvdata(dev); alcor_pci_aspm_ctrl(priv, 0); return 0; diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c index 40a6d199f2ea..4214f02a17fd 100644 --- a/drivers/misc/cardreader/rts5260.c +++ b/drivers/misc/cardreader/rts5260.c @@ -191,7 +191,6 @@ static int sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr) static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card) { - int err = 0; struct rtsx_cr_option *option = &pcr->option; if (option->ocp_en) @@ -231,7 +230,7 @@ static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card) rtsx_pci_write_register(pcr, REG_PRE_RW_MODE, EN_INFINITE_MODE, 0); - return err; + return 0; } static int rts5260_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c new file mode 100644 index 000000000000..bc4967a6efa1 --- /dev/null +++ b/drivers/misc/cardreader/rts5261.c @@ -0,0 +1,793 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved. + * + * Author: + * Rui FENG <rui_feng@realsil.com.cn> + * Wei WANG <wei_wang@realsil.com.cn> + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/rtsx_pci.h> + +#include "rts5261.h" +#include "rtsx_pcr.h" + +static u8 rts5261_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & IC_VERSION_MASK; +} + +static void rts5261_fill_driving(struct rtsx_pcr *pcr, u8 voltage) +{ + u8 driving_3v3[4][3] = { + {0x13, 0x13, 0x13}, + {0x96, 0x96, 0x96}, + {0x7F, 0x7F, 0x7F}, + {0x96, 0x96, 0x96}, + }; + u8 driving_1v8[4][3] = { + {0x99, 0x99, 0x99}, + {0x3A, 0x3A, 0x3A}, + {0xE6, 0xE6, 0xE6}, + {0xB3, 0xB3, 0xB3}, + }; + u8 (*driving)[3], drive_sel; + + if (voltage == OUTPUT_3V3) { + driving = driving_3v3; + drive_sel = pcr->sd30_drive_sel_3v3; + } else { + driving = driving_1v8; + drive_sel = pcr->sd30_drive_sel_1v8; + } + + rtsx_pci_write_register(pcr, SD30_CLK_DRIVE_SEL, + 0xFF, driving[drive_sel][0]); + + rtsx_pci_write_register(pcr, SD30_CMD_DRIVE_SEL, + 0xFF, driving[drive_sel][1]); + + rtsx_pci_write_register(pcr, SD30_DAT_DRIVE_SEL, + 0xFF, driving[drive_sel][2]); +} + +static void rtsx5261_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + /* 0x814~0x817 */ + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + + if (!rts5261_vendor_setting_valid(reg)) { + pcr_dbg(pcr, "skip fetch vendor setting\n"); + return; + } + + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rts5261_reg_to_card_drive_sel(reg); + + if (rts5261_reg_check_reverse_socket(reg)) + pcr->flags |= PCR_REVERSE_SOCKET; + + /* 0x724~0x727 */ + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + pcr->aspm_en = rts5261_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = rts5261_reg_to_sd30_drive_sel_1v8(reg); + pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(reg); +} + +static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, + RELINK_TIME_MASK, 0); + + if (pm_state == HOST_ENTER_S3) + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, + D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + + rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL, + SSC_POWER_DOWN, SSC_POWER_DOWN); +} + +static int rts5261_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_EN); +} + +static int rts5261_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_DISABLE); +} + +static int rts5261_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, + 0x02, 0x02); +} + +static int rts5261_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, + 0x02, 0x00); +} + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5261_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5261_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +static int rts5261_sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr) +{ + rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK + | SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF, + CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + + return 0; +} + +static int rts5261_card_power_on(struct rtsx_pcr *pcr, int card) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) + rtsx_pci_enable_ocp(pcr); + + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG1, + RTS5261_LDO1_TUNE_MASK, RTS5261_LDO1_33); + rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL, + RTS5261_LDO1_POWERON, RTS5261_LDO1_POWERON); + + rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL, + RTS5261_LDO3318_POWERON, RTS5261_LDO3318_POWERON); + + msleep(20); + + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, SD_OUTPUT_EN); + + /* Initialize SD_CFG1 register */ + rtsx_pci_write_register(pcr, SD_CFG1, 0xFF, + SD_CLK_DIVIDE_128 | SD_20_MODE | SD_BUS_WIDTH_1BIT); + + rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL, + 0xFF, SD20_RX_POS_EDGE); + rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0); + rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR, + SD_STOP | SD_CLR_ERR); + + /* Reset SD_CFG3 register */ + rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0); + rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG, + SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 | + SD30_CLK_STOP_CFG0, 0); + + if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 || + pcr->extra_caps & EXTRA_CAPS_SD_SDR104) + rts5261_sd_set_sample_push_timing_sd30(pcr); + + return 0; +} + +static int rts5261_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + u16 val = 0; + + rtsx_pci_write_register(pcr, RTS5261_CARD_PWR_CTL, + RTS5261_PUPDC, RTS5261_PUPDC); + + switch (voltage) { + case OUTPUT_3V3: + rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val); + val |= PHY_TUNE_SDBUS_33; + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val); + if (err < 0) + return err; + + rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG, + RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_33); + rtsx_pci_write_register(pcr, SD_PAD_CTL, + SD_IO_USING_1V8, 0); + break; + case OUTPUT_1V8: + rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val); + val &= ~PHY_TUNE_SDBUS_33; + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val); + if (err < 0) + return err; + + rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG, + RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_18); + rtsx_pci_write_register(pcr, SD_PAD_CTL, + SD_IO_USING_1V8, SD_IO_USING_1V8); + break; + default: + return -EINVAL; + } + + /* set pad drive */ + rts5261_fill_driving(pcr, voltage); + + return 0; +} + +static void rts5261_stop_cmd(struct rtsx_pcr *pcr) +{ + rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); + rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0, + RTS5260_DMA_RST | RTS5260_ADMA3_RST, + RTS5260_DMA_RST | RTS5260_ADMA3_RST); + rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH); +} + +static void rts5261_card_before_power_off(struct rtsx_pcr *pcr) +{ + rts5261_stop_cmd(pcr); + rts5261_switch_output_voltage(pcr, OUTPUT_3V3); + +} + +static void rts5261_enable_ocp(struct rtsx_pcr *pcr) +{ + u8 val = 0; + + val = SD_OCP_INT_EN | SD_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val); + +} + +static void rts5261_disable_ocp(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + + mask = SD_OCP_INT_EN | SD_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0); + +} + +static int rts5261_card_power_off(struct rtsx_pcr *pcr, int card) +{ + int err = 0; + + rts5261_card_before_power_off(pcr); + err = rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL, + RTS5261_LDO_POWERON_MASK, 0); + + if (pcr->option.ocp_en) + rtsx_pci_disable_ocp(pcr); + + return err; +} + +static void rts5261_init_ocp(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) { + u8 mask, val; + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN); + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_THD_MASK, option->sd_800mA_ocp_thd); + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_LMT_THD_MASK, + RTS5261_LDO1_LMT_THD_2000); + + mask = SD_OCP_GLITCH_MASK; + val = pcr->hw_param.ocp_glitch; + rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val); + + rts5261_enable_ocp(pcr); + } else { + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0); + } +} + +static void rts5261_clear_ocpstat(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + u8 val = 0; + + mask = SD_OCP_INT_CLR | SD_OC_CLR; + val = SD_OCP_INT_CLR | SD_OC_CLR; + + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val); + + udelay(10); + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + +} + +static void rts5261_process_ocp(struct rtsx_pcr *pcr) +{ + if (!pcr->option.ocp_en) + return; + + rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat); + + if (pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) { + rts5261_card_power_off(pcr, RTSX_SD_CARD); + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0); + rts5261_clear_ocpstat(pcr); + pcr->ocp_stat = 0; + } + +} + +static int rts5261_init_from_hw(struct rtsx_pcr *pcr) +{ + int retval; + u32 lval, i; + u8 valid, efuse_valid, tmp; + + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + REG_EFUSE_POR | REG_EFUSE_POWER_MASK, + REG_EFUSE_POR | REG_EFUSE_POWERON); + udelay(1); + rtsx_pci_write_register(pcr, RTS5261_EFUSE_ADDR, + RTS5261_EFUSE_ADDR_MASK, 0x00); + rtsx_pci_write_register(pcr, RTS5261_EFUSE_CTL, + RTS5261_EFUSE_ENABLE | RTS5261_EFUSE_MODE_MASK, + RTS5261_EFUSE_ENABLE); + + /* Wait transfer end */ + for (i = 0; i < MAX_RW_REG_CNT; i++) { + rtsx_pci_read_register(pcr, RTS5261_EFUSE_CTL, &tmp); + if ((tmp & 0x80) == 0) + break; + } + rtsx_pci_read_register(pcr, RTS5261_EFUSE_READ_DATA, &tmp); + efuse_valid = ((tmp & 0x0C) >> 2); + pcr_dbg(pcr, "Load efuse valid: 0x%x\n", efuse_valid); + + if (efuse_valid == 0) { + retval = rtsx_pci_read_config_dword(pcr, + PCR_SETTING_REG2, &lval); + if (retval != 0) + pcr_dbg(pcr, "read 0x814 DW fail\n"); + pcr_dbg(pcr, "DW from 0x814: 0x%x\n", lval); + /* 0x816 */ + valid = (u8)((lval >> 16) & 0x03); + pcr_dbg(pcr, "0x816: %d\n", valid); + } + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + REG_EFUSE_POR, 0); + pcr_dbg(pcr, "Disable efuse por!\n"); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &lval); + lval = lval & 0x00FFFFFF; + retval = rtsx_pci_write_config_dword(pcr, PCR_SETTING_REG2, lval); + if (retval != 0) + pcr_dbg(pcr, "write config fail\n"); + + return retval; +} + +static void rts5261_init_from_cfg(struct rtsx_pcr *pcr) +{ + u32 lval; + struct rtsx_cr_option *option = &pcr->option; + + rtsx_pci_read_config_dword(pcr, PCR_ASPM_SETTING_REG1, &lval); + + if (lval & ASPM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); + else + rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN); + + if (lval & ASPM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); + else + rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN); + + if (lval & PM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_1_EN); + else + rtsx_clear_dev_flag(pcr, PM_L1_1_EN); + + if (lval & PM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_2_EN); + else + rtsx_clear_dev_flag(pcr, PM_L1_2_EN); + + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0); + if (option->ltr_en) { + u16 val; + + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); + if (val & PCI_EXP_DEVCTL2_LTR_EN) { + option->ltr_enabled = true; + option->ltr_active = true; + rtsx_set_ltr_latency(pcr, option->ltr_active_latency); + } else { + option->ltr_enabled = false; + } + } + + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) + option->force_clkreq_0 = false; + else + option->force_clkreq_0 = true; +} + +static int rts5261_extra_init_hw(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1, + CD_RESUME_EN_MASK, CD_RESUME_EN_MASK); + + rts5261_init_from_cfg(pcr); + rts5261_init_from_hw(pcr); + + /* power off efuse */ + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + REG_EFUSE_POWER_MASK, REG_EFUSE_POWEROFF); + rtsx_pci_write_register(pcr, L1SUB_CONFIG1, + AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE); + rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, 0); + + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4, + RTS5261_AUX_CLK_16M_EN, 0); + + /* Release PRSNT# */ + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4, + RTS5261_FORCE_PRSNT_LOW, 0); + rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, + FUNC_FORCE_UPME_XMT_DBG, FUNC_FORCE_UPME_XMT_DBG); + + rtsx_pci_write_register(pcr, PCLK_CTL, + PCLK_MODE_SEL, PCLK_MODE_SEL); + + rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0); + rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, CLK_PM_EN, CLK_PM_EN); + + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x0F, 0x02); + + /* Configure driving */ + rts5261_fill_driving(pcr, OUTPUT_3V3); + + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00); + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL); + + /* Clear Enter RTD3_cold Information*/ + rtsx_pci_write_register(pcr, RTS5261_FW_CTL, + RTS5261_INFORM_RTD3_COLD, 0); + + return 0; +} + +static void rts5261_enable_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + val = FORCE_ASPM_CTL0; + val |= (pcr->aspm_en & 0x02); + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } + pcr->aspm_enabled = enable; + +} + +static void rts5261_disable_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + val = 0; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + val = 0; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + val = FORCE_ASPM_CTL0; + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + } + rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0); + udelay(10); + pcr->aspm_enabled = enable; +} + +static void rts5261_set_aspm(struct rtsx_pcr *pcr, bool enable) +{ + if (enable) + rts5261_enable_aspm(pcr, true); + else + rts5261_disable_aspm(pcr, false); +} + +static void rts5261_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) +{ + struct rtsx_cr_option *option = &pcr->option; + int aspm_L1_1, aspm_L1_2; + u8 val = 0; + + aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN); + aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN); + + if (active) { + /* run, latency: 60us */ + if (aspm_L1_1) + val = option->ltr_l1off_snooze_sspwrgate; + } else { + /* l1off, latency: 300us */ + if (aspm_L1_2) + val = option->ltr_l1off_sspwrgate; + } + + rtsx_set_l1off_sub(pcr, val); +} + +static const struct pcr_ops rts5261_pcr_ops = { + .fetch_vendor_settings = rtsx5261_fetch_vendor_settings, + .turn_on_led = rts5261_turn_on_led, + .turn_off_led = rts5261_turn_off_led, + .extra_init_hw = rts5261_extra_init_hw, + .enable_auto_blink = rts5261_enable_auto_blink, + .disable_auto_blink = rts5261_disable_auto_blink, + .card_power_on = rts5261_card_power_on, + .card_power_off = rts5261_card_power_off, + .switch_output_voltage = rts5261_switch_output_voltage, + .force_power_down = rts5261_force_power_down, + .stop_cmd = rts5261_stop_cmd, + .set_aspm = rts5261_set_aspm, + .set_l1off_cfg_sub_d0 = rts5261_set_l1off_cfg_sub_d0, + .enable_ocp = rts5261_enable_ocp, + .disable_ocp = rts5261_disable_ocp, + .init_ocp = rts5261_init_ocp, + .process_ocp = rts5261_process_ocp, + .clear_ocpstat = rts5261_clear_ocpstat, +}; + +static inline u8 double_ssc_depth(u8 depth) +{ + return ((depth > 1) ? (depth - 1) : depth); +} + +int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) +{ + int err, clk; + u16 n; + u8 clk_divider, mcu_cnt, div; + static const u8 depth[] = { + [RTSX_SSC_DEPTH_4M] = RTS5261_SSC_DEPTH_4M, + [RTSX_SSC_DEPTH_2M] = RTS5261_SSC_DEPTH_2M, + [RTSX_SSC_DEPTH_1M] = RTS5261_SSC_DEPTH_1M, + [RTSX_SSC_DEPTH_500K] = RTS5261_SSC_DEPTH_512K, + }; + + if (initial_mode) { + /* We use 250k(around) here, in initial stage */ + clk_divider = SD_CLK_DIVIDE_128; + card_clock = 30000000; + } else { + clk_divider = SD_CLK_DIVIDE_0; + } + err = rtsx_pci_write_register(pcr, SD_CFG1, + SD_CLK_DIVIDE_MASK, clk_divider); + if (err < 0) + return err; + + card_clock /= 1000000; + pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock); + + clk = card_clock; + if (!initial_mode && double_clk) + clk = card_clock * 2; + pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n", + clk, pcr->cur_clock); + + if (clk == pcr->cur_clock) + return 0; + + if (pcr->ops->conv_clk_and_div_n) + n = pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N); + else + n = clk - 4; + if ((clk <= 4) || (n > 396)) + return -EINVAL; + + mcu_cnt = 125/clk + 3; + if (mcu_cnt > 15) + mcu_cnt = 15; + + div = CLK_DIV_1; + while ((n < MIN_DIV_N_PCR - 4) && (div < CLK_DIV_8)) { + if (pcr->ops->conv_clk_and_div_n) { + int dbl_clk = pcr->ops->conv_clk_and_div_n(n, + DIV_N_TO_CLK) * 2; + n = pcr->ops->conv_clk_and_div_n(dbl_clk, + CLK_TO_DIV_N); + } else { + n = (n + 4) * 2 - 4; + } + div++; + } + + n = (n / 2); + pcr_dbg(pcr, "n = %d, div = %d\n", n, div); + + ssc_depth = depth[ssc_depth]; + if (double_clk) + ssc_depth = double_ssc_depth(ssc_depth); + + if (ssc_depth) { + if (div == CLK_DIV_2) { + if (ssc_depth > 1) + ssc_depth -= 1; + else + ssc_depth = RTS5261_SSC_DEPTH_8M; + } else if (div == CLK_DIV_4) { + if (ssc_depth > 2) + ssc_depth -= 2; + else + ssc_depth = RTS5261_SSC_DEPTH_8M; + } else if (div == CLK_DIV_8) { + if (ssc_depth > 3) + ssc_depth -= 3; + else + ssc_depth = RTS5261_SSC_DEPTH_8M; + } + } else { + ssc_depth = 0; + } + pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL, + CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, + 0xFF, (div << 4) | mcu_cnt); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, + SSC_DEPTH_MASK, ssc_depth); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); + if (vpclk) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL, + PHASE_NOT_RESET, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + } + + err = rtsx_pci_send_cmd(pcr, 2000); + if (err < 0) + return err; + + /* Wait SSC clock stable */ + udelay(SSC_CLOCK_STABLE_WAIT); + err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + if (err < 0) + return err; + + pcr->cur_clock = clk; + return 0; + +} + +void rts5261_init_params(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + struct rtsx_hw_param *hw_param = &pcr->hw_param; + + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 1; + pcr->ops = &rts5261_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16); + pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); + + pcr->ic_version = rts5261_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5261_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5261_sd_pull_ctl_disable_tbl; + + pcr->reg_pm_ctrl3 = RTS5261_AUTOLOAD_CFG3; + + option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN + | LTR_L1SS_PWR_GATE_EN); + option->ltr_en = true; + + /* init latency of active, idle, L1OFF to 60us, 300us, 3ms */ + option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF; + option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF; + option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF; + option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF; + option->ltr_l1off_sspwrgate = 0x7F; + option->ltr_l1off_snooze_sspwrgate = 0x78; + option->dev_aspm_mode = DEV_ASPM_DYNAMIC; + + option->ocp_en = 1; + hw_param->interrupt_en |= SD_OC_INT_EN; + hw_param->ocp_glitch = SD_OCP_GLITCH_800U; + option->sd_800mA_ocp_thd = RTS5261_LDO1_OCP_THD_1040; +} diff --git a/drivers/misc/cardreader/rts5261.h b/drivers/misc/cardreader/rts5261.h new file mode 100644 index 000000000000..ebfdd236a553 --- /dev/null +++ b/drivers/misc/cardreader/rts5261.h @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved. + * + * Author: + * Rui FENG <rui_feng@realsil.com.cn> + * Wei WANG <wei_wang@realsil.com.cn> + */ +#ifndef RTS5261_H +#define RTS5261_H + +/*New add*/ +#define rts5261_vendor_setting_valid(reg) ((reg) & 0x010000) +#define rts5261_reg_to_aspm(reg) (((reg) >> 28) ^ 0x03) +#define rts5261_reg_check_reverse_socket(reg) ((reg) & 0x04) +#define rts5261_reg_to_card_drive_sel(reg) ((((reg) >> 6) & 0x01) << 6) +#define rts5261_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 22) ^ 0x03) +#define rts5261_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 16) ^ 0x03) + + +#define RTS5261_AUTOLOAD_CFG0 0xFF7B +#define RTS5261_AUTOLOAD_CFG1 0xFF7C +#define RTS5261_AUTOLOAD_CFG2 0xFF7D +#define RTS5261_AUTOLOAD_CFG3 0xFF7E +#define RTS5261_AUTOLOAD_CFG4 0xFF7F +#define RTS5261_FORCE_PRSNT_LOW (1 << 6) +#define RTS5261_AUX_CLK_16M_EN (1 << 5) + +#define RTS5261_REG_VREF 0xFE97 +#define RTS5261_PWD_SUSPND_EN (1 << 4) + +#define RTS5261_PAD_H3L1 0xFF79 +#define PAD_GPIO_H3L1 (1 << 3) + +/* SSC_CTL2 0xFC12 */ +#define RTS5261_SSC_DEPTH_MASK 0x07 +#define RTS5261_SSC_DEPTH_DISALBE 0x00 +#define RTS5261_SSC_DEPTH_8M 0x01 +#define RTS5261_SSC_DEPTH_4M 0x02 +#define RTS5261_SSC_DEPTH_2M 0x03 +#define RTS5261_SSC_DEPTH_1M 0x04 +#define RTS5261_SSC_DEPTH_512K 0x05 +#define RTS5261_SSC_DEPTH_256K 0x06 +#define RTS5261_SSC_DEPTH_128K 0x07 + +/* efuse control register*/ +#define RTS5261_EFUSE_CTL 0xFC30 +#define RTS5261_EFUSE_ENABLE 0x80 +/* EFUSE_MODE: 0=READ 1=PROGRAM */ +#define RTS5261_EFUSE_MODE_MASK 0x40 +#define RTS5261_EFUSE_PROGRAM 0x40 + +#define RTS5261_EFUSE_ADDR 0xFC31 +#define RTS5261_EFUSE_ADDR_MASK 0x3F + +#define RTS5261_EFUSE_WRITE_DATA 0xFC32 +#define RTS5261_EFUSE_READ_DATA 0xFC34 + +/* DMACTL 0xFE2C */ +#define RTS5261_DMA_PACK_SIZE_MASK 0xF0 + +/* FW config info register */ +#define RTS5261_FW_CFG_INFO0 0xFF50 +#define RTS5261_FW_EXPRESS_TEST_MASK (0x01<<0) +#define RTS5261_FW_EA_MODE_MASK (0x01<<5) + +/* FW config register */ +#define RTS5261_FW_CFG0 0xFF54 +#define RTS5261_FW_ENTER_EXPRESS (0x01<<0) + +#define RTS5261_FW_CFG1 0xFF55 +#define RTS5261_SYS_CLK_SEL_MCU_CLK (0x01<<7) +#define RTS5261_CRC_CLK_SEL_MCU_CLK (0x01<<6) +#define RTS5261_FAKE_MCU_CLOCK_GATING (0x01<<5) +/*MCU_bus_mode_sel: 0=real 8051 1=fake mcu*/ +#define RTS5261_MCU_BUS_SEL_MASK (0x01<<4) +/*MCU_clock_sel:VerA 00=aux16M 01=aux400K 1x=REFCLK100M*/ +/*MCU_clock_sel:VerB 00=aux400K 01=aux16M 10=REFCLK100M*/ +#define RTS5261_MCU_CLOCK_SEL_MASK (0x03<<2) +#define RTS5261_MCU_CLOCK_SEL_16M (0x01<<2) +#define RTS5261_MCU_CLOCK_GATING (0x01<<1) +#define RTS5261_DRIVER_ENABLE_FW (0x01<<0) + +/* FW status register */ +#define RTS5261_FW_STATUS 0xFF56 +#define RTS5261_EXPRESS_LINK_FAIL_MASK (0x01<<7) + +/* FW control register */ +#define RTS5261_FW_CTL 0xFF5F +#define RTS5261_INFORM_RTD3_COLD (0x01<<5) + +#define RTS5261_REG_FPDCTL 0xFF60 + +#define RTS5261_REG_LDO12_CFG 0xFF6E +#define RTS5261_LDO12_VO_TUNE_MASK (0x07<<1) +#define RTS5261_LDO12_115 (0x03<<1) +#define RTS5261_LDO12_120 (0x04<<1) +#define RTS5261_LDO12_125 (0x05<<1) +#define RTS5261_LDO12_130 (0x06<<1) +#define RTS5261_LDO12_135 (0x07<<1) + +/* LDO control register */ +#define RTS5261_CARD_PWR_CTL 0xFD50 +#define RTS5261_SD_CLK_ISO (0x01<<7) +#define RTS5261_PAD_SD_DAT_FW_CTRL (0x01<<6) +#define RTS5261_PUPDC (0x01<<5) +#define RTS5261_SD_CMD_ISO (0x01<<4) +#define RTS5261_SD_DAT_ISO_MASK (0x0F<<0) + +#define RTS5261_LDO1233318_POW_CTL 0xFF70 +#define RTS5261_LDO3318_POWERON (0x01<<3) +#define RTS5261_LDO3_POWERON (0x01<<2) +#define RTS5261_LDO2_POWERON (0x01<<1) +#define RTS5261_LDO1_POWERON (0x01<<0) +#define RTS5261_LDO_POWERON_MASK (0x0F<<0) + +#define RTS5261_DV3318_CFG 0xFF71 +#define RTS5261_DV3318_TUNE_MASK (0x07<<4) +#define RTS5261_DV3318_18 (0x02<<4) +#define RTS5261_DV3318_19 (0x04<<4) +#define RTS5261_DV3318_33 (0x07<<4) + +#define RTS5261_LDO1_CFG0 0xFF72 +#define RTS5261_LDO1_OCP_THD_MASK (0x07<<5) +#define RTS5261_LDO1_OCP_EN (0x01<<4) +#define RTS5261_LDO1_OCP_LMT_THD_MASK (0x03<<2) +#define RTS5261_LDO1_OCP_LMT_EN (0x01<<1) + +/* CRD6603-433 190319 request changed */ +#define RTS5261_LDO1_OCP_THD_740 (0x00<<5) +#define RTS5261_LDO1_OCP_THD_800 (0x01<<5) +#define RTS5261_LDO1_OCP_THD_860 (0x02<<5) +#define RTS5261_LDO1_OCP_THD_920 (0x03<<5) +#define RTS5261_LDO1_OCP_THD_980 (0x04<<5) +#define RTS5261_LDO1_OCP_THD_1040 (0x05<<5) +#define RTS5261_LDO1_OCP_THD_1100 (0x06<<5) +#define RTS5261_LDO1_OCP_THD_1160 (0x07<<5) + +#define RTS5261_LDO1_LMT_THD_450 (0x00<<2) +#define RTS5261_LDO1_LMT_THD_1000 (0x01<<2) +#define RTS5261_LDO1_LMT_THD_1500 (0x02<<2) +#define RTS5261_LDO1_LMT_THD_2000 (0x03<<2) + +#define RTS5261_LDO1_CFG1 0xFF73 +#define RTS5261_LDO1_TUNE_MASK (0x07<<1) +#define RTS5261_LDO1_18 (0x05<<1) +#define RTS5261_LDO1_33 (0x07<<1) +#define RTS5261_LDO1_PWD_MASK (0x01<<0) + +#define RTS5261_LDO2_CFG0 0xFF74 +#define RTS5261_LDO2_OCP_THD_MASK (0x07<<5) +#define RTS5261_LDO2_OCP_EN (0x01<<4) +#define RTS5261_LDO2_OCP_LMT_THD_MASK (0x03<<2) +#define RTS5261_LDO2_OCP_LMT_EN (0x01<<1) + +#define RTS5261_LDO2_OCP_THD_620 (0x00<<5) +#define RTS5261_LDO2_OCP_THD_650 (0x01<<5) +#define RTS5261_LDO2_OCP_THD_680 (0x02<<5) +#define RTS5261_LDO2_OCP_THD_720 (0x03<<5) +#define RTS5261_LDO2_OCP_THD_750 (0x04<<5) +#define RTS5261_LDO2_OCP_THD_780 (0x05<<5) +#define RTS5261_LDO2_OCP_THD_810 (0x06<<5) +#define RTS5261_LDO2_OCP_THD_840 (0x07<<5) + +#define RTS5261_LDO2_CFG1 0xFF75 +#define RTS5261_LDO2_TUNE_MASK (0x07<<1) +#define RTS5261_LDO2_18 (0x05<<1) +#define RTS5261_LDO2_33 (0x07<<1) +#define RTS5261_LDO2_PWD_MASK (0x01<<0) + +#define RTS5261_LDO3_CFG0 0xFF76 +#define RTS5261_LDO3_OCP_THD_MASK (0x07<<5) +#define RTS5261_LDO3_OCP_EN (0x01<<4) +#define RTS5261_LDO3_OCP_LMT_THD_MASK (0x03<<2) +#define RTS5261_LDO3_OCP_LMT_EN (0x01<<1) + +#define RTS5261_LDO3_OCP_THD_620 (0x00<<5) +#define RTS5261_LDO3_OCP_THD_650 (0x01<<5) +#define RTS5261_LDO3_OCP_THD_680 (0x02<<5) +#define RTS5261_LDO3_OCP_THD_720 (0x03<<5) +#define RTS5261_LDO3_OCP_THD_750 (0x04<<5) +#define RTS5261_LDO3_OCP_THD_780 (0x05<<5) +#define RTS5261_LDO3_OCP_THD_810 (0x06<<5) +#define RTS5261_LDO3_OCP_THD_840 (0x07<<5) + +#define RTS5261_LDO3_CFG1 0xFF77 +#define RTS5261_LDO3_TUNE_MASK (0x07<<1) +#define RTS5261_LDO3_18 (0x05<<1) +#define RTS5261_LDO3_33 (0x07<<1) +#define RTS5261_LDO3_PWD_MASK (0x01<<0) + +#define RTS5261_REG_PME_FORCE_CTL 0xFF78 +#define FORCE_PM_CONTROL 0x20 +#define FORCE_PM_VALUE 0x10 +#define REG_EFUSE_BYPASS 0x08 +#define REG_EFUSE_POR 0x04 +#define REG_EFUSE_POWER_MASK 0x03 +#define REG_EFUSE_POWERON 0x03 +#define REG_EFUSE_POWEROFF 0x00 + + +/* Single LUN, support SD/SD EXPRESS */ +#define DEFAULT_SINGLE 0 +#define SD_LUN 1 +#define SD_EXPRESS_LUN 2 + +/* For Change_FPGA_SSCClock Function */ +#define MULTIPLY_BY_1 0x00 +#define MULTIPLY_BY_2 0x01 +#define MULTIPLY_BY_3 0x02 +#define MULTIPLY_BY_4 0x03 +#define MULTIPLY_BY_5 0x04 +#define MULTIPLY_BY_6 0x05 +#define MULTIPLY_BY_7 0x06 +#define MULTIPLY_BY_8 0x07 +#define MULTIPLY_BY_9 0x08 +#define MULTIPLY_BY_10 0x09 + +#define DIVIDE_BY_2 0x01 +#define DIVIDE_BY_3 0x02 +#define DIVIDE_BY_4 0x03 +#define DIVIDE_BY_5 0x04 +#define DIVIDE_BY_6 0x05 +#define DIVIDE_BY_7 0x06 +#define DIVIDE_BY_8 0x07 +#define DIVIDE_BY_9 0x08 +#define DIVIDE_BY_10 0x09 + +int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); + +#endif /* RTS5261_H */ diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index b4a66b64f742..06038b325b02 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -22,6 +22,7 @@ #include <asm/unaligned.h> #include "rtsx_pcr.h" +#include "rts5261.h" static bool msi_en = true; module_param(msi_en, bool, S_IRUGO | S_IWUSR); @@ -34,9 +35,6 @@ static struct mfd_cell rtsx_pcr_cells[] = { [RTSX_SD_CARD] = { .name = DRV_NAME_RTSX_PCI_SDMMC, }, - [RTSX_MS_CARD] = { - .name = DRV_NAME_RTSX_PCI_MS, - }, }; static const struct pci_device_id rtsx_pci_ids[] = { @@ -51,6 +49,7 @@ static const struct pci_device_id rtsx_pci_ids[] = { { PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x5260), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5261), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { 0, } }; @@ -438,8 +437,16 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr, if (end) option |= RTSX_SG_END; - val = ((u64)addr << 32) | ((u64)len << 12) | option; + if (PCI_PID(pcr) == PID_5261) { + if (len > 0xFFFF) + val = ((u64)addr << 32) | (((u64)len & 0xFFFF) << 16) + | (((u64)len >> 16) << 6) | option; + else + val = ((u64)addr << 32) | ((u64)len << 16) | option; + } else { + val = ((u64)addr << 32) | ((u64)len << 12) | option; + } put_unaligned_le64(val, ptr); pcr->sgi++; } @@ -684,7 +691,6 @@ int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card) else return -EINVAL; - return rtsx_pci_set_pull_ctl(pcr, tbl); } EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable); @@ -735,6 +741,10 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, [RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K, }; + if (PCI_PID(pcr) == PID_5261) + return rts5261_pci_switch_clock(pcr, card_clock, + ssc_depth, initial_mode, double_clk, vpclk); + if (initial_mode) { /* We use 250k(around) here, in initial stage */ clk_divider = SD_CLK_DIVIDE_128; @@ -1253,7 +1263,15 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) rtsx_pci_enable_bus_int(pcr); /* Power on SSC */ - err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); + if (PCI_PID(pcr) == PID_5261) { + /* Gating real mcu clock */ + err = rtsx_pci_write_register(pcr, RTS5261_FW_CFG1, + RTS5261_MCU_CLOCK_GATING, 0); + err = rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL, + SSC_POWER_DOWN, 0); + } else { + err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); + } if (err < 0) return err; @@ -1283,7 +1301,12 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) /* Enable SSC Clock */ rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, 0xFF, SSC_8X_EN | SSC_SEL_4M); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); + if (PCI_PID(pcr) == PID_5261) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, + RTS5261_SSC_DEPTH_2M); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); + /* Disable cd_pwr_save */ rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10); /* Clear Link Ready Interrupt */ @@ -1314,6 +1337,7 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) case PID_524A: case PID_525A: case PID_5260: + case PID_5261: rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1); break; default: @@ -1393,9 +1417,14 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) case 0x5286: rtl8402_init_params(pcr); break; + case 0x5260: rts5260_init_params(pcr); break; + + case 0x5261: + rts5261_init_params(pcr); + break; } pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n", @@ -1483,7 +1512,7 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, bar = 1; len = pci_resource_len(pcidev, bar); base = pci_resource_start(pcidev, bar); - pcr->remap_addr = ioremap_nocache(base, len); + pcr->remap_addr = ioremap(base, len); if (!pcr->remap_addr) { ret = -ENOMEM; goto free_handle; diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h index 98f729263dc1..ed391df52f4f 100644 --- a/drivers/misc/cardreader/rtsx_pcr.h +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -53,6 +53,7 @@ void rts524a_init_params(struct rtsx_pcr *pcr); void rts525a_init_params(struct rtsx_pcr *pcr); void rtl8411b_init_params(struct rtsx_pcr *pcr); void rts5260_init_params(struct rtsx_pcr *pcr); +void rts5261_init_params(struct rtsx_pcr *pcr); static inline u8 map_sd_drive(int idx) { diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index aed9c445d1e2..fb2eff69e449 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -352,7 +352,7 @@ void cxl_context_free(struct cxl_context *ctx) void cxl_context_mm_count_get(struct cxl_context *ctx) { if (ctx->mm) - atomic_inc(&ctx->mm->mm_count); + mmgrab(ctx->mm); } void cxl_context_mm_count_put(struct cxl_context *ctx) diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c index 4d6836f19489..cb9cca35a226 100644 --- a/drivers/misc/cxl/flash.c +++ b/drivers/misc/cxl/flash.c @@ -473,12 +473,6 @@ static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; } -static long device_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - return device_ioctl(file, cmd, arg); -} - static int device_close(struct inode *inode, struct file *file) { struct cxl *adapter = file->private_data; @@ -514,7 +508,7 @@ static const struct file_operations fops = { .owner = THIS_MODULE, .open = device_open, .unlocked_ioctl = device_ioctl, - .compat_ioctl = device_compat_ioctl, + .compat_ioctl = compat_ptr_ioctl, .release = device_close, }; diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 482a2c1b340a..43b312d06e3e 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -18,6 +18,7 @@ #include <linux/sched/task.h> #include <asm/cputable.h> +#include <asm/mmu.h> #include <misc/cxl-base.h> #include "cxl.h" @@ -315,6 +316,9 @@ static int __init init_cxl(void) { int rc = 0; + if (!tlbie_capable) + return -EINVAL; + if ((rc = cxl_file_init())) return rc; diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig index f2abe27010ef..0f791bfdc1f5 100644 --- a/drivers/misc/eeprom/Kconfig +++ b/drivers/misc/eeprom/Kconfig @@ -45,13 +45,16 @@ config EEPROM_AT25 will be called at25. config EEPROM_LEGACY - tristate "Old I2C EEPROM reader" + tristate "Old I2C EEPROM reader (DEPRECATED)" depends on I2C && SYSFS help If you say yes here you get read-only access to the EEPROM data available on modern memory DIMMs and Sony Vaio laptops via I2C. Such EEPROMs could theoretically be available on other devices as well. + This driver is deprecated and will be removed soon, please use the + better at24 driver instead. + This driver can also be built as a module. If so, the module will be called eeprom. diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 518945b2f737..031eb64549af 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: GPL-2.0-or-later /* * at24.c - handle most I2C EEPROMs * @@ -6,24 +6,23 @@ * Copyright (C) 2008 Wolfram Sang, Pengutronix */ -#include <linux/kernel.h> +#include <linux/acpi.h> +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/i2c.h> #include <linux/init.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/mod_devicetable.h> #include <linux/module.h> -#include <linux/of_device.h> -#include <linux/slab.h> -#include <linux/delay.h> #include <linux/mutex.h> -#include <linux/mod_devicetable.h> -#include <linux/log2.h> -#include <linux/bitops.h> -#include <linux/jiffies.h> -#include <linux/property.h> -#include <linux/acpi.h> -#include <linux/i2c.h> #include <linux/nvmem-provider.h> -#include <linux/regmap.h> +#include <linux/of_device.h> #include <linux/pm_runtime.h> -#include <linux/gpio/consumer.h> +#include <linux/property.h> +#include <linux/regmap.h> +#include <linux/regulator/consumer.h> +#include <linux/slab.h> /* Address pointer is 16 bit. */ #define AT24_FLAG_ADDR16 BIT(7) @@ -89,8 +88,7 @@ struct at24_data { u8 flags; struct nvmem_device *nvmem; - - struct gpio_desc *wp_gpio; + struct regulator *vcc_reg; /* * Some chips tie up multiple I2C addresses; dummy devices reserve @@ -458,12 +456,10 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) * from this host, but not from other I2C masters. */ mutex_lock(&at24->lock); - gpiod_set_value_cansleep(at24->wp_gpio, 0); while (count) { ret = at24_regmap_write(at24, buf, off, count); if (ret < 0) { - gpiod_set_value_cansleep(at24->wp_gpio, 1); mutex_unlock(&at24->lock); pm_runtime_put(dev); return ret; @@ -473,7 +469,6 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) count -= ret; } - gpiod_set_value_cansleep(at24->wp_gpio, 1); mutex_unlock(&at24->lock); pm_runtime_put(dev); @@ -663,9 +658,9 @@ static int at24_probe(struct i2c_client *client) at24->client[0].client = client; at24->client[0].regmap = regmap; - at24->wp_gpio = devm_gpiod_get_optional(dev, "wp", GPIOD_OUT_HIGH); - if (IS_ERR(at24->wp_gpio)) - return PTR_ERR(at24->wp_gpio); + at24->vcc_reg = devm_regulator_get(dev, "vcc"); + if (IS_ERR(at24->vcc_reg)) + return PTR_ERR(at24->vcc_reg); writable = !(flags & AT24_FLAG_READONLY); if (writable) { @@ -702,6 +697,12 @@ static int at24_probe(struct i2c_client *client) i2c_set_clientdata(client, at24); + err = regulator_enable(at24->vcc_reg); + if (err) { + dev_err(dev, "Failed to enable vcc regulator\n"); + return err; + } + /* enable runtime pm */ pm_runtime_set_active(dev); pm_runtime_enable(dev); @@ -714,27 +715,58 @@ static int at24_probe(struct i2c_client *client) pm_runtime_idle(dev); if (err) { pm_runtime_disable(dev); + regulator_disable(at24->vcc_reg); return -ENODEV; } - dev_info(dev, "%u byte %s EEPROM, %s, %u bytes/write\n", - byte_len, client->name, - writable ? "writable" : "read-only", at24->write_max); + if (writable) + dev_info(dev, "%u byte %s EEPROM, writable, %u bytes/write\n", + byte_len, client->name, at24->write_max); + else + dev_info(dev, "%u byte %s EEPROM, read-only\n", + byte_len, client->name); return 0; } static int at24_remove(struct i2c_client *client) { + struct at24_data *at24 = i2c_get_clientdata(client); + pm_runtime_disable(&client->dev); + if (!pm_runtime_status_suspended(&client->dev)) + regulator_disable(at24->vcc_reg); pm_runtime_set_suspended(&client->dev); return 0; } +static int __maybe_unused at24_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct at24_data *at24 = i2c_get_clientdata(client); + + return regulator_disable(at24->vcc_reg); +} + +static int __maybe_unused at24_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct at24_data *at24 = i2c_get_clientdata(client); + + return regulator_enable(at24->vcc_reg); +} + +static const struct dev_pm_ops at24_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(at24_suspend, at24_resume, NULL) +}; + static struct i2c_driver at24_driver = { .driver = { .name = "at24", + .pm = &at24_pm_ops, .of_match_table = at24_of_match, .acpi_match_table = ACPI_PTR(at24_acpi_ids), }, diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c index 6f00c33cfe22..b081c67416d7 100644 --- a/drivers/misc/eeprom/ee1004.c +++ b/drivers/misc/eeprom/ee1004.c @@ -195,13 +195,13 @@ static int ee1004_probe(struct i2c_client *client, mutex_lock(&ee1004_bus_lock); if (++ee1004_dev_count == 1) { for (cnr = 0; cnr < 2; cnr++) { - ee1004_set_page[cnr] = i2c_new_dummy(client->adapter, + ee1004_set_page[cnr] = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr); - if (!ee1004_set_page[cnr]) { + if (IS_ERR(ee1004_set_page[cnr])) { dev_err(&client->dev, "address 0x%02x unavailable\n", EE1004_ADDR_SET_PAGE + cnr); - err = -EADDRINUSE; + err = PTR_ERR(ee1004_set_page[cnr]); goto err_clients; } } diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c index 2cfe3d4ae144..226b5efa6a77 100644 --- a/drivers/misc/eeprom/eeprom.c +++ b/drivers/misc/eeprom/eeprom.c @@ -175,6 +175,10 @@ static int eeprom_probe(struct i2c_client *client, } } + /* Let the users know they are using deprecated driver */ + dev_notice(&client->dev, + "eeprom driver is deprecated, please use at24 instead\n"); + /* create the sysfs eeprom file */ return sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr); } diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c index 4d0cb90f4aeb..9da81f6d4a1c 100644 --- a/drivers/misc/eeprom/max6875.c +++ b/drivers/misc/eeprom/max6875.c @@ -150,9 +150,9 @@ static int max6875_probe(struct i2c_client *client, return -ENOMEM; /* A fake client is created on the odd address */ - data->fake_client = i2c_new_dummy(client->adapter, client->addr + 1); - if (!data->fake_client) { - err = -ENOMEM; + data->fake_client = i2c_new_dummy_device(client->adapter, client->addr + 1); + if (IS_ERR(data->fake_client)) { + err = PTR_ERR(data->fake_client); goto exit_kfree; } diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index 6d27ccfe0680..3c2d405bc79b 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -406,10 +406,9 @@ int enclosure_remove_device(struct enclosure_device *edev, struct device *dev) cdev = &edev->component[i]; if (cdev->dev == dev) { enclosure_remove_links(cdev); - device_del(&cdev->cdev); put_device(dev); cdev->dev = NULL; - return device_add(&cdev->cdev); + return 0; } } return -ENODEV; diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 98603e235cf0..e3e085e33d46 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -32,9 +32,9 @@ #define FASTRPC_CTX_MAX (256) #define FASTRPC_INIT_HANDLE 1 #define FASTRPC_CTXID_MASK (0xFF0) -#define INIT_FILELEN_MAX (64 * 1024 * 1024) -#define INIT_MEMLEN_MAX (8 * 1024 * 1024) +#define INIT_FILELEN_MAX (2 * 1024 * 1024) #define FASTRPC_DEVICE_NAME "fastrpc" +#define ADSP_MMAP_ADD_PAGES 0x1000 /* Retrives number of input buffers from the scalars parameter */ #define REMOTE_SCALARS_INBUFS(sc) (((sc) >> 16) & 0x0ff) @@ -67,6 +67,8 @@ /* Remote Method id table */ #define FASTRPC_RMID_INIT_ATTACH 0 #define FASTRPC_RMID_INIT_RELEASE 1 +#define FASTRPC_RMID_INIT_MMAP 4 +#define FASTRPC_RMID_INIT_MUNMAP 5 #define FASTRPC_RMID_INIT_CREATE 6 #define FASTRPC_RMID_INIT_CREATE_ATTR 7 #define FASTRPC_RMID_INIT_CREATE_STATIC 8 @@ -90,6 +92,23 @@ struct fastrpc_remote_arg { u64 len; }; +struct fastrpc_mmap_rsp_msg { + u64 vaddr; +}; + +struct fastrpc_mmap_req_msg { + s32 pgid; + u32 flags; + u64 vaddr; + s32 num; +}; + +struct fastrpc_munmap_req_msg { + s32 pgid; + u64 vaddr; + u64 size; +}; + struct fastrpc_msg { int pid; /* process group id */ int tid; /* thread id */ @@ -124,6 +143,9 @@ struct fastrpc_buf { /* Lock for dma buf attachments */ struct mutex lock; struct list_head attachments; + /* mmap support */ + struct list_head node; /* list of user requested mmaps */ + uintptr_t raddr; }; struct fastrpc_dma_buf_attachment { @@ -186,12 +208,14 @@ struct fastrpc_channel_ctx { struct idr ctx_idr; struct list_head users; struct miscdevice miscdev; + struct kref refcount; }; struct fastrpc_user { struct list_head user; struct list_head maps; struct list_head pending; + struct list_head mmaps; struct fastrpc_channel_ctx *cctx; struct fastrpc_session_ctx *sctx; @@ -269,6 +293,7 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, return -ENOMEM; INIT_LIST_HEAD(&buf->attachments); + INIT_LIST_HEAD(&buf->node); mutex_init(&buf->lock); buf->fl = fl; @@ -276,11 +301,15 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, buf->phys = 0; buf->size = size; buf->dev = dev; + buf->raddr = 0; buf->virt = dma_alloc_coherent(dev, buf->size, (dma_addr_t *)&buf->phys, GFP_KERNEL); - if (!buf->virt) + if (!buf->virt) { + mutex_destroy(&buf->lock); + kfree(buf); return -ENOMEM; + } if (fl->sctx && fl->sctx->sid) buf->phys += ((u64)fl->sctx->sid << 32); @@ -290,6 +319,25 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, return 0; } +static void fastrpc_channel_ctx_free(struct kref *ref) +{ + struct fastrpc_channel_ctx *cctx; + + cctx = container_of(ref, struct fastrpc_channel_ctx, refcount); + + kfree(cctx); +} + +static void fastrpc_channel_ctx_get(struct fastrpc_channel_ctx *cctx) +{ + kref_get(&cctx->refcount); +} + +static void fastrpc_channel_ctx_put(struct fastrpc_channel_ctx *cctx) +{ + kref_put(&cctx->refcount, fastrpc_channel_ctx_free); +} + static void fastrpc_context_free(struct kref *ref) { struct fastrpc_invoke_ctx *ctx; @@ -313,6 +361,8 @@ static void fastrpc_context_free(struct kref *ref) kfree(ctx->maps); kfree(ctx->olaps); kfree(ctx); + + fastrpc_channel_ctx_put(cctx); } static void fastrpc_context_get(struct fastrpc_invoke_ctx *ctx) @@ -419,6 +469,9 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc( fastrpc_get_buff_overlaps(ctx); } + /* Released in fastrpc_context_put() */ + fastrpc_channel_ctx_get(cctx); + ctx->sc = sc; ctx->retval = -1; ctx->pid = current->pid; @@ -448,6 +501,7 @@ err_idr: spin_lock(&user->lock); list_del(&ctx->node); spin_unlock(&user->lock); + fastrpc_channel_ctx_put(cctx); kfree(ctx->maps); kfree(ctx->olaps); kfree(ctx); @@ -499,6 +553,7 @@ static int fastrpc_dma_buf_attach(struct dma_buf *dmabuf, FASTRPC_PHYS(buffer->phys), buffer->size); if (ret < 0) { dev_err(buffer->dev, "failed to get scatterlist from DMA API\n"); + kfree(a); return -EINVAL; } @@ -522,16 +577,10 @@ static void fastrpc_dma_buf_detatch(struct dma_buf *dmabuf, mutex_lock(&buffer->lock); list_del(&a->node); mutex_unlock(&buffer->lock); + sg_free_table(&a->sgt); kfree(a); } -static void *fastrpc_kmap(struct dma_buf *dmabuf, unsigned long pgnum) -{ - struct fastrpc_buf *buf = dmabuf->priv; - - return buf->virt ? buf->virt + pgnum * PAGE_SIZE : NULL; -} - static void *fastrpc_vmap(struct dma_buf *dmabuf) { struct fastrpc_buf *buf = dmabuf->priv; @@ -555,7 +604,6 @@ static const struct dma_buf_ops fastrpc_dma_buf_ops = { .map_dma_buf = fastrpc_map_dma_buf, .unmap_dma_buf = fastrpc_unmap_dma_buf, .mmap = fastrpc_mmap, - .map = fastrpc_kmap, .vmap = fastrpc_vmap, .release = fastrpc_release, }; @@ -884,6 +932,9 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (!fl->sctx) return -EINVAL; + if (!fl->cctx->rpdev) + return -EPIPE; + ctx = fastrpc_context_alloc(fl, kernel, sc, args); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -901,8 +952,13 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (err) goto bail; - /* Wait for remote dsp to respond or time out */ - err = wait_for_completion_interruptible(&ctx->work); + if (kernel) { + if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) + err = -ETIMEDOUT; + } else { + err = wait_for_completion_interruptible(&ctx->work); + } + if (err) goto bail; @@ -921,12 +977,13 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, } bail: - /* We are done with this compute context, remove it from pending list */ - spin_lock(&fl->lock); - list_del(&ctx->node); - spin_unlock(&fl->lock); - fastrpc_context_put(ctx); - + if (err != -ERESTARTSYS && err != -ETIMEDOUT) { + /* We are done with this compute context */ + spin_lock(&fl->lock); + list_del(&ctx->node); + spin_unlock(&fl->lock); + fastrpc_context_put(ctx); + } if (err) dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err); @@ -1098,6 +1155,7 @@ static int fastrpc_device_release(struct inode *inode, struct file *file) struct fastrpc_channel_ctx *cctx = fl->cctx; struct fastrpc_invoke_ctx *ctx, *n; struct fastrpc_map *map, *m; + struct fastrpc_buf *buf, *b; unsigned long flags; fastrpc_release_current_dsp_process(fl); @@ -1119,7 +1177,13 @@ static int fastrpc_device_release(struct inode *inode, struct file *file) fastrpc_map_put(map); } + list_for_each_entry_safe(buf, b, &fl->mmaps, node) { + list_del(&buf->node); + fastrpc_buf_free(buf); + } + fastrpc_session_free(cctx, fl->sctx); + fastrpc_channel_ctx_put(cctx); mutex_destroy(&fl->mutex); kfree(fl); @@ -1138,11 +1202,15 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp) if (!fl) return -ENOMEM; + /* Released in fastrpc_device_release() */ + fastrpc_channel_ctx_get(cctx); + filp->private_data = fl; spin_lock_init(&fl->lock); mutex_init(&fl->mutex); INIT_LIST_HEAD(&fl->pending); INIT_LIST_HEAD(&fl->maps); + INIT_LIST_HEAD(&fl->mmaps); INIT_LIST_HEAD(&fl->user); fl->tgid = current->tgid; fl->cctx = cctx; @@ -1163,26 +1231,6 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp) return 0; } -static int fastrpc_dmabuf_free(struct fastrpc_user *fl, char __user *argp) -{ - struct dma_buf *buf; - int info; - - if (copy_from_user(&info, argp, sizeof(info))) - return -EFAULT; - - buf = dma_buf_get(info); - if (IS_ERR_OR_NULL(buf)) - return -EINVAL; - /* - * one for the last get and other for the ALLOC_DMA_BUFF ioctl - */ - dma_buf_put(buf); - dma_buf_put(buf); - - return 0; -} - static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp) { struct fastrpc_alloc_dma_buf bp; @@ -1218,8 +1266,6 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp) return -EFAULT; } - get_dma_buf(buf->dmabuf); - return 0; } @@ -1270,6 +1316,148 @@ static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp) return err; } +static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, + struct fastrpc_req_munmap *req) +{ + struct fastrpc_invoke_args args[1] = { [0] = { 0 } }; + struct fastrpc_buf *buf, *b; + struct fastrpc_munmap_req_msg req_msg; + struct device *dev = fl->sctx->dev; + int err; + u32 sc; + + spin_lock(&fl->lock); + list_for_each_entry_safe(buf, b, &fl->mmaps, node) { + if ((buf->raddr == req->vaddrout) && (buf->size == req->size)) + break; + buf = NULL; + } + spin_unlock(&fl->lock); + + if (!buf) { + dev_err(dev, "mmap not in list\n"); + return -EINVAL; + } + + req_msg.pgid = fl->tgid; + req_msg.size = buf->size; + req_msg.vaddr = buf->raddr; + + args[0].ptr = (u64) (uintptr_t) &req_msg; + args[0].length = sizeof(req_msg); + + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MUNMAP, 1, 0); + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, + &args[0]); + if (!err) { + dev_dbg(dev, "unmmap\tpt 0x%09lx OK\n", buf->raddr); + spin_lock(&fl->lock); + list_del(&buf->node); + spin_unlock(&fl->lock); + fastrpc_buf_free(buf); + } else { + dev_err(dev, "unmmap\tpt 0x%09lx ERROR\n", buf->raddr); + } + + return err; +} + +static int fastrpc_req_munmap(struct fastrpc_user *fl, char __user *argp) +{ + struct fastrpc_req_munmap req; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + return fastrpc_req_munmap_impl(fl, &req); +} + +static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) +{ + struct fastrpc_invoke_args args[3] = { [0 ... 2] = { 0 } }; + struct fastrpc_buf *buf = NULL; + struct fastrpc_mmap_req_msg req_msg; + struct fastrpc_mmap_rsp_msg rsp_msg; + struct fastrpc_req_munmap req_unmap; + struct fastrpc_phy_page pages; + struct fastrpc_req_mmap req; + struct device *dev = fl->sctx->dev; + int err; + u32 sc; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + if (req.flags != ADSP_MMAP_ADD_PAGES) { + dev_err(dev, "flag not supported 0x%x\n", req.flags); + return -EINVAL; + } + + if (req.vaddrin) { + dev_err(dev, "adding user allocated pages is not supported\n"); + return -EINVAL; + } + + err = fastrpc_buf_alloc(fl, fl->sctx->dev, req.size, &buf); + if (err) { + dev_err(dev, "failed to allocate buffer\n"); + return err; + } + + req_msg.pgid = fl->tgid; + req_msg.flags = req.flags; + req_msg.vaddr = req.vaddrin; + req_msg.num = sizeof(pages); + + args[0].ptr = (u64) (uintptr_t) &req_msg; + args[0].length = sizeof(req_msg); + + pages.addr = buf->phys; + pages.size = buf->size; + + args[1].ptr = (u64) (uintptr_t) &pages; + args[1].length = sizeof(pages); + + args[2].ptr = (u64) (uintptr_t) &rsp_msg; + args[2].length = sizeof(rsp_msg); + + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MMAP, 2, 1); + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, + &args[0]); + if (err) { + dev_err(dev, "mmap error (len 0x%08llx)\n", buf->size); + goto err_invoke; + } + + /* update the buffer to be able to deallocate the memory on the DSP */ + buf->raddr = (uintptr_t) rsp_msg.vaddr; + + /* let the client know the address to use */ + req.vaddrout = rsp_msg.vaddr; + + spin_lock(&fl->lock); + list_add_tail(&buf->node, &fl->mmaps); + spin_unlock(&fl->lock); + + if (copy_to_user((void __user *)argp, &req, sizeof(req))) { + /* unmap the memory and release the buffer */ + req_unmap.vaddrout = buf->raddr; + req_unmap.size = buf->size; + fastrpc_req_munmap_impl(fl, &req_unmap); + return -EFAULT; + } + + dev_dbg(dev, "mmap\t\tpt 0x%09lx OK [len 0x%08llx]\n", + buf->raddr, buf->size); + + return 0; + +err_invoke: + fastrpc_buf_free(buf); + + return err; +} + static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1287,12 +1475,15 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); break; - case FASTRPC_IOCTL_FREE_DMA_BUFF: - err = fastrpc_dmabuf_free(fl, argp); - break; case FASTRPC_IOCTL_ALLOC_DMA_BUFF: err = fastrpc_dmabuf_alloc(fl, argp); break; + case FASTRPC_IOCTL_MMAP: + err = fastrpc_req_mmap(fl, argp); + break; + case FASTRPC_IOCTL_MUNMAP: + err = fastrpc_req_munmap(fl, argp); + break; default: err = -ENOTTY; break; @@ -1395,10 +1586,6 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) int i, err, domain_id = -1; const char *domain; - data = devm_kzalloc(rdev, sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - err = of_property_read_string(rdev->of_node, "label", &domain); if (err) { dev_info(rdev, "FastRPC Domain not specified in DT\n"); @@ -1417,14 +1604,20 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) return -EINVAL; } + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->miscdev.minor = MISC_DYNAMIC_MINOR; - data->miscdev.name = kasprintf(GFP_KERNEL, "fastrpc-%s", - domains[domain_id]); + data->miscdev.name = devm_kasprintf(rdev, GFP_KERNEL, "fastrpc-%s", + domains[domain_id]); data->miscdev.fops = &fastrpc_fops; err = misc_register(&data->miscdev); if (err) return err; + kref_init(&data->refcount); + dev_set_drvdata(&rpdev->dev, data); dma_set_mask_and_coherent(rdev, DMA_BIT_MASK(32)); INIT_LIST_HEAD(&data->users); @@ -1459,7 +1652,9 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev) misc_deregister(&cctx->miscdev); of_platform_depopulate(&rpdev->dev); - kfree(cctx); + + cctx->rpdev = NULL; + fastrpc_channel_ctx_put(cctx); } static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data, diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c index 026c6ca24540..905106579935 100644 --- a/drivers/misc/genwqe/card_ddcb.c +++ b/drivers/misc/genwqe/card_ddcb.c @@ -1084,7 +1084,7 @@ static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue) queue->ddcb_daddr); queue->ddcb_vaddr = NULL; queue->ddcb_daddr = 0ull; - return -ENODEV; + return rc; } @@ -1179,7 +1179,7 @@ static irqreturn_t genwqe_vf_isr(int irq, void *dev_id) */ static int genwqe_card_thread(void *data) { - int should_stop = 0, rc = 0; + int should_stop = 0; struct genwqe_dev *cd = (struct genwqe_dev *)data; while (!kthread_should_stop()) { @@ -1187,12 +1187,12 @@ static int genwqe_card_thread(void *data) genwqe_check_ddcb_queue(cd, &cd->queue); if (GENWQE_POLLING_ENABLED) { - rc = wait_event_interruptible_timeout( + wait_event_interruptible_timeout( cd->queue_waitq, genwqe_ddcbs_in_flight(cd) || (should_stop = kthread_should_stop()), 1); } else { - rc = wait_event_interruptible_timeout( + wait_event_interruptible_timeout( cd->queue_waitq, genwqe_next_ddcb_ready(cd) || (should_stop = kthread_should_stop()), HZ); diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 0e34c0568fed..040a0bda3125 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -1215,34 +1215,13 @@ static long genwqe_ioctl(struct file *filp, unsigned int cmd, return rc; } -#if defined(CONFIG_COMPAT) -/** - * genwqe_compat_ioctl() - Compatibility ioctl - * - * Called whenever a 32-bit process running under a 64-bit kernel - * performs an ioctl on /dev/genwqe<n>_card. - * - * @filp: file pointer. - * @cmd: command. - * @arg: user argument. - * Return: zero on success or negative number on failure. - */ -static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - return genwqe_ioctl(filp, cmd, arg); -} -#endif /* defined(CONFIG_COMPAT) */ - static const struct file_operations genwqe_fops = { .owner = THIS_MODULE, .open = genwqe_open, .fasync = genwqe_fasync, .mmap = genwqe_mmap, .unlocked_ioctl = genwqe_ioctl, -#if defined(CONFIG_COMPAT) - .compat_ioctl = genwqe_compat_ioctl, -#endif + .compat_ioctl = compat_ptr_ioctl, .release = genwqe_release, }; diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/asid.c index 2c01461701a3..a2fdf31cf27c 100644 --- a/drivers/misc/habanalabs/asid.c +++ b/drivers/misc/habanalabs/asid.c @@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev) mutex_init(&hdev->asid_mutex); - /* ASID 0 is reserved for KMD and device CPU */ + /* ASID 0 is reserved for the kernel driver and device CPU */ set_bit(0, hdev->asid_bitmap); return 0; diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/command_buffer.c index e495f44064fa..53fddbd8e693 100644 --- a/drivers/misc/habanalabs/command_buffer.c +++ b/drivers/misc/habanalabs/command_buffer.c @@ -397,7 +397,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size) rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle, HL_KERNEL_ASID_ID); if (rc) { - dev_err(hdev->dev, "Failed to allocate CB for KMD %d\n", rc); + dev_err(hdev->dev, + "Failed to allocate CB for the kernel driver %d\n", rc); return NULL; } diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index f00d1c32f6d6..0bf08678431b 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs) kref_put(&cs->refcount, cs_do_release); } +static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) +{ + /* + * Patched CB is created for external queues jobs, and for H/W queues + * jobs if the user CB was allocated by driver and MMU is disabled. + */ + return (job->queue_type == QUEUE_TYPE_EXT || + (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && + !hdev->mmu_enable)); +} + /* * cs_parser - parse the user command submission * @@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) parser.patched_cb = NULL; parser.user_cb = job->user_cb; parser.user_cb_size = job->user_cb_size; - parser.ext_queue = job->ext_queue; + parser.queue_type = job->queue_type; + parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; job->patched_cb = NULL; rc = hdev->asic_funcs->cs_parser(hdev, &parser); - if (job->ext_queue) { + + if (is_cb_patched(hdev, job)) { if (!rc) { job->patched_cb = parser.patched_cb; job->job_cb_size = parser.patched_cb_size; @@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) { struct hl_cs *cs = job->cs; - if (job->ext_queue) { + if (is_cb_patched(hdev, job)) { hl_userptr_delete_list(hdev, &job->userptr_list); /* @@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) } } + /* For H/W queue jobs, if a user CB was allocated by driver and MMU is + * enabled, the user CB isn't released in cs_parser() and thus should be + * released here. + */ + if (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && hdev->mmu_enable) { + spin_lock(&job->user_cb->lock); + job->user_cb->cs_cnt--; + spin_unlock(&job->user_cb->lock); + + hl_cb_put(job->user_cb); + } + /* * This is the only place where there can be multiple threads * modifying the list at the same time @@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) hl_debugfs_remove_job(hdev, job); - if (job->ext_queue) + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) cs_put(cs); kfree(job); @@ -178,11 +206,23 @@ static void cs_do_release(struct kref *ref) /* We also need to update CI for internal queues */ if (cs->submitted) { - int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt); + hdev->asic_funcs->hw_queues_lock(hdev); + + hdev->cs_active_cnt--; + if (!hdev->cs_active_cnt) { + struct hl_device_idle_busy_ts *ts; + + ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; + ts->busy_to_idle_ts = ktime_get(); - WARN_ONCE((cs_cnt < 0), - "hl%d: error in CS active cnt %d\n", - hdev->id, cs_cnt); + if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) + hdev->idle_busy_ts_idx = 0; + } else if (hdev->cs_active_cnt < 0) { + dev_crit(hdev->dev, "CS active cnt %d is negative\n", + hdev->cs_active_cnt); + } + + hdev->asic_funcs->hw_queues_unlock(hdev); hl_int_hw_queue_update_ci(cs); @@ -305,6 +345,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)]; if ((other) && (!dma_fence_is_signaled(other))) { spin_unlock(&ctx->cs_lock); + dev_dbg(hdev->dev, + "Rejecting CS because of too many in-flights CS\n"); rc = -EAGAIN; goto free_fence; } @@ -373,18 +415,13 @@ static void job_wq_completion(struct work_struct *work) free_job(hdev, job); } -static struct hl_cb *validate_queue_index(struct hl_device *hdev, - struct hl_cb_mgr *cb_mgr, - struct hl_cs_chunk *chunk, - bool *ext_queue) +static int validate_queue_index(struct hl_device *hdev, + struct hl_cs_chunk *chunk, + enum hl_queue_type *queue_type, + bool *is_kernel_allocated_cb) { struct asic_fixed_properties *asic = &hdev->asic_prop; struct hw_queue_properties *hw_queue_prop; - u32 cb_handle; - struct hl_cb *cb; - - /* Assume external queue */ - *ext_queue = true; hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; @@ -392,19 +429,29 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev, (hw_queue_prop->type == QUEUE_TYPE_NA)) { dev_err(hdev->dev, "Queue index %d is invalid\n", chunk->queue_index); - return NULL; + return -EINVAL; } - if (hw_queue_prop->kmd_only) { - dev_err(hdev->dev, "Queue index %d is restricted for KMD\n", + if (hw_queue_prop->driver_only) { + dev_err(hdev->dev, + "Queue index %d is restricted for the kernel driver\n", chunk->queue_index); - return NULL; - } else if (hw_queue_prop->type == QUEUE_TYPE_INT) { - *ext_queue = false; - return (struct hl_cb *) (uintptr_t) chunk->cb_handle; + return -EINVAL; } - /* Retrieve CB object */ + *queue_type = hw_queue_prop->type; + *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; + + return 0; +} + +static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, + struct hl_cb_mgr *cb_mgr, + struct hl_cs_chunk *chunk) +{ + struct hl_cb *cb; + u32 cb_handle; + cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); cb = hl_cb_get(hdev, cb_mgr, cb_handle); @@ -429,7 +476,8 @@ release_cb: return NULL; } -struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb) { struct hl_cs_job *job; @@ -437,12 +485,14 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) if (!job) return NULL; - job->ext_queue = ext_queue; + job->queue_type = queue_type; + job->is_kernel_allocated_cb = is_kernel_allocated_cb; - if (job->ext_queue) { + if (is_cb_patched(hdev, job)) INIT_LIST_HEAD(&job->userptr_list); + + if (job->queue_type == QUEUE_TYPE_EXT) INIT_WORK(&job->finish_work, job_wq_completion); - } return job; } @@ -455,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, struct hl_cs_job *job; struct hl_cs *cs; struct hl_cb *cb; - bool ext_queue_present = false; + bool int_queues_only = true; u32 size_to_copy; int rc, i, parse_cnt; @@ -499,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, /* Validate ALL the CS chunks before submitting the CS */ for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) { struct hl_cs_chunk *chunk = &cs_chunk_array[i]; - bool ext_queue; + enum hl_queue_type queue_type; + bool is_kernel_allocated_cb; + + rc = validate_queue_index(hdev, chunk, &queue_type, + &is_kernel_allocated_cb); + if (rc) + goto free_cs_object; - cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk, - &ext_queue); - if (ext_queue) { - ext_queue_present = true; + if (is_kernel_allocated_cb) { + cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); if (!cb) { rc = -EINVAL; goto free_cs_object; } + } else { + cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; } - job = hl_cs_allocate_job(hdev, ext_queue); + if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW) + int_queues_only = false; + + job = hl_cs_allocate_job(hdev, queue_type, + is_kernel_allocated_cb); if (!job) { dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; - if (ext_queue) + if (is_kernel_allocated_cb) goto release_cb; else goto free_cs_object; @@ -525,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, job->cs = cs; job->user_cb = cb; job->user_cb_size = chunk->cb_size; - if (job->ext_queue) + if (is_kernel_allocated_cb) job->job_cb_size = cb->size; else job->job_cb_size = chunk->cb_size; @@ -538,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, /* * Increment CS reference. When CS reference is 0, CS is * done and can be signaled to user and free all its resources - * Only increment for JOB on external queues, because only - * for those JOBs we get completion + * Only increment for JOB on external or H/W queues, because + * only for those JOBs we get completion */ - if (job->ext_queue) + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) cs_get(cs); hl_debugfs_add_job(hdev, job); @@ -555,9 +616,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, } } - if (!ext_queue_present) { + if (int_queues_only) { dev_err(hdev->dev, - "Reject CS %d.%llu because no external queues jobs\n", + "Reject CS %d.%llu because only internal queues jobs are present\n", cs->ctx->asid, cs->sequence); rc = -EINVAL; goto free_cs_object; @@ -565,9 +626,10 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, rc = hl_hw_queue_schedule_cs(cs); if (rc) { - dev_err(hdev->dev, - "Failed to submit CS %d.%llu to H/W queues, error %d\n", - cs->ctx->asid, cs->sequence, rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to submit CS %d.%llu to H/W queues, error %d\n", + cs->ctx->asid, cs->sequence, rc); goto free_cs_object; } @@ -762,8 +824,9 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) memset(args, 0, sizeof(*args)); if (rc < 0) { - dev_err(hdev->dev, "Error %ld on waiting for CS handle %llu\n", - rc, seq); + dev_err_ratelimited(hdev->dev, + "Error %ld on waiting for CS handle %llu\n", + rc, seq); if (rc == -ERESTARTSYS) { args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; rc = -EINTR; diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c index 8682590e3f6e..2df6fb87e7ff 100644 --- a/drivers/misc/habanalabs/context.c +++ b/drivers/misc/habanalabs/context.c @@ -26,12 +26,13 @@ static void hl_ctx_fini(struct hl_ctx *ctx) dma_fence_put(ctx->cs_pending[i]); if (ctx->asid != HL_KERNEL_ASID_ID) { - /* - * The engines are stopped as there is no executing CS, but the + /* The engines are stopped as there is no executing CS, but the * Coresight might be still working by accessing addresses * related to the stopped engines. Hence stop it explicitly. + * Stop only if this is the compute context, as there can be + * only one compute context */ - if (hdev->in_debug) + if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) hl_device_set_debug_mode(hdev, false); hl_vm_ctx_fini(ctx); @@ -67,29 +68,36 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) goto out_err; } + mutex_lock(&mgr->ctx_lock); + rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); + mutex_unlock(&mgr->ctx_lock); + + if (rc < 0) { + dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n"); + goto free_ctx; + } + + ctx->handle = rc; + rc = hl_ctx_init(hdev, ctx, false); if (rc) - goto free_ctx; + goto remove_from_idr; hl_hpriv_get(hpriv); ctx->hpriv = hpriv; - /* TODO: remove for multiple contexts */ + /* TODO: remove for multiple contexts per process */ hpriv->ctx = ctx; - hdev->user_ctx = ctx; - mutex_lock(&mgr->ctx_lock); - rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); - mutex_unlock(&mgr->ctx_lock); - - if (rc < 0) { - dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n"); - hl_ctx_free(hdev, ctx); - goto out_err; - } + /* TODO: remove the following line for multiple process support */ + hdev->compute_ctx = ctx; return 0; +remove_from_idr: + mutex_lock(&mgr->ctx_lock); + idr_remove(&mgr->ctx_handles, ctx->handle); + mutex_unlock(&mgr->ctx_lock); free_ctx: kfree(ctx); out_err: @@ -120,7 +128,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) ctx->thread_ctx_switch_wait_token = 0; if (is_kernel_ctx) { - ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */ + ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */ rc = hl_mmu_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "Failed to init mmu ctx module\n"); @@ -168,7 +176,7 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) spin_lock(&ctx->cs_lock); if (seq >= ctx->cs_sequence) { - dev_notice(hdev->dev, + dev_notice_ratelimited(hdev->dev, "Can't wait on seq %llu because current CS is at seq %llu\n", seq, ctx->cs_sequence); spin_unlock(&ctx->cs_lock); diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 18e499c900c7..20413e350343 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -29,7 +29,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_I2C_RD << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD << ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.i2c_bus = i2c_bus; pkt.i2c_addr = i2c_addr; @@ -55,12 +55,12 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_I2C_WR << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR << ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.i2c_bus = i2c_bus; pkt.i2c_addr = i2c_addr; pkt.i2c_reg = i2c_reg; - pkt.value = __cpu_to_le64(val); + pkt.value = cpu_to_le64(val); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); @@ -81,10 +81,10 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_LED_SET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET << ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.led_index = __cpu_to_le32(led); - pkt.value = __cpu_to_le64(state); + pkt.led_index = cpu_to_le32(led); + pkt.value = cpu_to_le64(state); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); @@ -307,45 +307,57 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx) (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); } -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, + u64 virt_addr, u64 mask, u64 shift) { return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP0_MASK) >> HOP0_SHIFT); + ((virt_addr & mask) >> shift); } -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP1_MASK) >> HOP1_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask, + mmu_specs->hop0_shift); } -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP2_MASK) >> HOP2_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask, + mmu_specs->hop1_shift); } -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP3_MASK) >> HOP3_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask, + mmu_specs->hop2_shift); } -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP4_MASK) >> HOP4_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask, + mmu_specs->hop3_shift); +} + +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask, + mmu_specs->hop4_shift); } static inline u64 get_next_hop_addr(u64 curr_pte) { if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & PHYS_ADDR_MASK; + return curr_pte & HOP_PHYS_ADDR_MASK; else return ULLONG_MAX; } @@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data) struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; struct hl_ctx *ctx; + bool is_dram_addr; u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, @@ -370,40 +385,46 @@ static int mmu_show(struct seq_file *s, void *data) if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID) ctx = hdev->kernel_ctx; else - ctx = hdev->user_ctx; + ctx = hdev->compute_ctx; if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return 0; } + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + mutex_lock(&ctx->mmu_lock); /* the following lookup is copied from unmap() in mmu.c */ hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); hop1_addr = get_next_hop_addr(hop0_pte); if (hop1_addr == ULLONG_MAX) goto not_mapped; - hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); hop2_addr = get_next_hop_addr(hop1_pte); if (hop2_addr == ULLONG_MAX) goto not_mapped; - hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); hop3_addr = get_next_hop_addr(hop2_pte); if (hop3_addr == ULLONG_MAX) goto not_mapped; - hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); if (!(hop3_pte & LAST_MASK)) { @@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data) if (hop4_addr == ULLONG_MAX) goto not_mapped; - hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); if (!(hop4_pte & PAGE_PRESENT_MASK)) goto not_mapped; @@ -506,6 +528,12 @@ static int engines_show(struct seq_file *s, void *data) struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, + "Can't check device idle during reset\n"); + return 0; + } + hdev->asic_funcs->is_device_idle(hdev, NULL, s); return 0; @@ -533,42 +561,51 @@ out: static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u64 *phys_addr) { - struct hl_ctx *ctx = hdev->user_ctx; + struct hl_ctx *ctx = hdev->compute_ctx; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 hop_addr, hop_pte_addr, hop_pte; - u64 offset_mask = HOP4_MASK | OFFSET_MASK; + u64 offset_mask = HOP4_MASK | FLAGS_MASK; int rc = 0; + bool is_dram_addr; if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return -EINVAL; } + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + mutex_lock(&ctx->mmu_lock); /* hop 0 */ hop_addr = get_hop0_addr(ctx); - hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); /* hop 1 */ hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); /* hop 2 */ hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); /* hop 3 */ hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); if (!(hop_pte & LAST_MASK)) { @@ -576,10 +613,11 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr, + virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - offset_mask = OFFSET_MASK; + offset_mask = FLAGS_MASK; } if (!(hop_pte & PAGE_PRESENT_MASK)) @@ -608,6 +646,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, u32 val; ssize_t rc; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); + return 0; + } + if (*ppos) return 0; @@ -637,6 +680,11 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, u32 value; ssize_t rc; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); + return 0; + } + rc = kstrtouint_from_user(buf, count, 16, &value); if (rc) return rc; diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 7a8f9d0b71b5..b155e9549076 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -53,13 +53,12 @@ static void hpriv_release(struct kref *ref) mutex_destroy(&hpriv->restore_phase_mutex); - kfree(hpriv); - - /* Now the FD is really closed */ - atomic_dec(&hdev->fd_open_cnt); + mutex_lock(&hdev->fpriv_list_lock); + list_del(&hpriv->dev_node); + hdev->compute_ctx = NULL; + mutex_unlock(&hdev->fpriv_list_lock); - /* This allows a new user context to open the device */ - hdev->user_ctx = NULL; + kfree(hpriv); } void hl_hpriv_get(struct hl_fpriv *hpriv) @@ -94,6 +93,24 @@ static int hl_device_release(struct inode *inode, struct file *filp) return 0; } +static int hl_device_release_ctrl(struct inode *inode, struct file *filp) +{ + struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev; + + filp->private_data = NULL; + + hdev = hpriv->hdev; + + mutex_lock(&hdev->fpriv_list_lock); + list_del(&hpriv->dev_node); + mutex_unlock(&hdev->fpriv_list_lock); + + kfree(hpriv); + + return 0; +} + /* * hl_mmap - mmap function for habanalabs device * @@ -124,55 +141,102 @@ static const struct file_operations hl_ops = { .compat_ioctl = hl_ioctl }; +static const struct file_operations hl_ctrl_ops = { + .owner = THIS_MODULE, + .open = hl_device_open_ctrl, + .release = hl_device_release_ctrl, + .unlocked_ioctl = hl_ioctl_control, + .compat_ioctl = hl_ioctl_control +}; + +static void device_release_func(struct device *dev) +{ + kfree(dev); +} + /* - * device_setup_cdev - setup cdev and device for habanalabs device + * device_init_cdev - Initialize cdev and device for habanalabs device * * @hdev: pointer to habanalabs device structure * @hclass: pointer to the class object of the device * @minor: minor number of the specific device - * @fpos : file operations to install for this device + * @fpos: file operations to install for this device + * @name: name of the device as it will appear in the filesystem + * @cdev: pointer to the char device object that will be initialized + * @dev: pointer to the device object that will be initialized * - * Create a cdev and a Linux device for habanalabs's device. Need to be - * called at the end of the habanalabs device initialization process, - * because this function exposes the device to the user + * Initialize a cdev and a Linux device for habanalabs's device. */ -static int device_setup_cdev(struct hl_device *hdev, struct class *hclass, - int minor, const struct file_operations *fops) +static int device_init_cdev(struct hl_device *hdev, struct class *hclass, + int minor, const struct file_operations *fops, + char *name, struct cdev *cdev, + struct device **dev) { - int err, devno = MKDEV(hdev->major, minor); - struct cdev *hdev_cdev = &hdev->cdev; - char *name; + cdev_init(cdev, fops); + cdev->owner = THIS_MODULE; - name = kasprintf(GFP_KERNEL, "hl%d", hdev->id); - if (!name) + *dev = kzalloc(sizeof(**dev), GFP_KERNEL); + if (!*dev) return -ENOMEM; - cdev_init(hdev_cdev, fops); - hdev_cdev->owner = THIS_MODULE; - err = cdev_add(hdev_cdev, devno, 1); - if (err) { - pr_err("Failed to add char device %s\n", name); - goto err_cdev_add; + device_initialize(*dev); + (*dev)->devt = MKDEV(hdev->major, minor); + (*dev)->class = hclass; + (*dev)->release = device_release_func; + dev_set_drvdata(*dev, hdev); + dev_set_name(*dev, "%s", name); + + return 0; +} + +static int device_cdev_sysfs_add(struct hl_device *hdev) +{ + int rc; + + rc = cdev_device_add(&hdev->cdev, hdev->dev); + if (rc) { + dev_err(hdev->dev, + "failed to add a char device to the system\n"); + return rc; } - hdev->dev = device_create(hclass, NULL, devno, NULL, "%s", name); - if (IS_ERR(hdev->dev)) { - pr_err("Failed to create device %s\n", name); - err = PTR_ERR(hdev->dev); - goto err_device_create; + rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); + if (rc) { + dev_err(hdev->dev, + "failed to add a control char device to the system\n"); + goto delete_cdev_device; } - dev_set_drvdata(hdev->dev, hdev); + /* hl_sysfs_init() must be done after adding the device to the system */ + rc = hl_sysfs_init(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize sysfs\n"); + goto delete_ctrl_cdev_device; + } - kfree(name); + hdev->cdev_sysfs_created = true; return 0; -err_device_create: - cdev_del(hdev_cdev); -err_cdev_add: - kfree(name); - return err; +delete_ctrl_cdev_device: + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); +delete_cdev_device: + cdev_device_del(&hdev->cdev, hdev->dev); + return rc; +} + +static void device_cdev_sysfs_del(struct hl_device *hdev) +{ + /* device_release() won't be called so must free devices explicitly */ + if (!hdev->cdev_sysfs_created) { + kfree(hdev->dev_ctrl); + kfree(hdev->dev); + return; + } + + hl_sysfs_fini(hdev); + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); + cdev_device_del(&hdev->cdev, hdev->dev); } /* @@ -227,20 +291,29 @@ static int device_early_init(struct hl_device *hdev) goto free_eq_wq; } + hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, + sizeof(struct hl_device_idle_busy_ts), + (GFP_KERNEL | __GFP_ZERO)); + if (!hdev->idle_busy_ts_arr) { + rc = -ENOMEM; + goto free_chip_info; + } + hl_cb_mgr_init(&hdev->kernel_cb_mgr); - mutex_init(&hdev->fd_open_cnt_lock); mutex_init(&hdev->send_cpu_message_lock); mutex_init(&hdev->debug_lock); mutex_init(&hdev->mmu_cache_lock); INIT_LIST_HEAD(&hdev->hw_queues_mirror_list); spin_lock_init(&hdev->hw_queues_mirror_lock); + INIT_LIST_HEAD(&hdev->fpriv_list); + mutex_init(&hdev->fpriv_list_lock); atomic_set(&hdev->in_reset, 0); - atomic_set(&hdev->fd_open_cnt, 0); - atomic_set(&hdev->cs_active_cnt, 0); return 0; +free_chip_info: + kfree(hdev->hl_chip_info); free_eq_wq: destroy_workqueue(hdev->eq_wq); free_cq_wq: @@ -266,8 +339,11 @@ static void device_early_fini(struct hl_device *hdev) mutex_destroy(&hdev->debug_lock); mutex_destroy(&hdev->send_cpu_message_lock); + mutex_destroy(&hdev->fpriv_list_lock); + hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); + kfree(hdev->idle_busy_ts_arr); kfree(hdev->hl_chip_info); destroy_workqueue(hdev->eq_wq); @@ -277,8 +353,6 @@ static void device_early_fini(struct hl_device *hdev) if (hdev->asic_funcs->early_fini) hdev->asic_funcs->early_fini(hdev); - - mutex_destroy(&hdev->fd_open_cnt_lock); } static void set_freq_to_low_job(struct work_struct *work) @@ -286,9 +360,13 @@ static void set_freq_to_low_job(struct work_struct *work) struct hl_device *hdev = container_of(work, struct hl_device, work_freq.work); - if (atomic_read(&hdev->fd_open_cnt) == 0) + mutex_lock(&hdev->fpriv_list_lock); + + if (!hdev->compute_ctx) hl_device_set_frequency(hdev, PLL_LOW); + mutex_unlock(&hdev->fpriv_list_lock); + schedule_delayed_work(&hdev->work_freq, usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); } @@ -338,7 +416,7 @@ static int device_late_init(struct hl_device *hdev) hdev->high_pll = hdev->asic_prop.high_pll; /* force setting to low frequency */ - atomic_set(&hdev->curr_pll_profile, PLL_LOW); + hdev->curr_pll_profile = PLL_LOW; if (hdev->pm_mng_profile == PM_AUTO) hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW); @@ -381,44 +459,128 @@ static void device_late_fini(struct hl_device *hdev) hdev->late_init_done = false; } +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms) +{ + struct hl_device_idle_busy_ts *ts; + ktime_t zero_ktime, curr = ktime_get(); + u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx; + s64 period_us, last_start_us, last_end_us, last_busy_time_us, + total_busy_time_us = 0, total_busy_time_ms; + + zero_ktime = ktime_set(0, 0); + period_us = period_ms * USEC_PER_MSEC; + ts = &hdev->idle_busy_ts_arr[last_index]; + + /* check case that device is currently in idle */ + if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) && + !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) { + + last_index--; + /* Handle case idle_busy_ts_idx was 0 */ + if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) + last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; + + ts = &hdev->idle_busy_ts_arr[last_index]; + } + + while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) { + /* Check if we are in last sample case. i.e. if the sample + * begun before the sampling period. This could be a real + * sample or 0 so need to handle both cases + */ + last_start_us = ktime_to_us( + ktime_sub(curr, ts->idle_to_busy_ts)); + + if (last_start_us > period_us) { + + /* First check two cases: + * 1. If the device is currently busy + * 2. If the device was idle during the whole sampling + * period + */ + + if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) { + /* Check if the device is currently busy */ + if (ktime_compare(ts->idle_to_busy_ts, + zero_ktime)) + return 100; + + /* We either didn't have any activity or we + * reached an entry which is 0. Either way, + * exit and return what was accumulated so far + */ + break; + } + + /* If sample has finished, check it is relevant */ + last_end_us = ktime_to_us( + ktime_sub(curr, ts->busy_to_idle_ts)); + + if (last_end_us > period_us) + break; + + /* It is relevant so add it but with adjustment */ + last_busy_time_us = ktime_to_us( + ktime_sub(ts->busy_to_idle_ts, + ts->idle_to_busy_ts)); + total_busy_time_us += last_busy_time_us - + (last_start_us - period_us); + break; + } + + /* Check if the sample is finished or still open */ + if (ktime_compare(ts->busy_to_idle_ts, zero_ktime)) + last_busy_time_us = ktime_to_us( + ktime_sub(ts->busy_to_idle_ts, + ts->idle_to_busy_ts)); + else + last_busy_time_us = ktime_to_us( + ktime_sub(curr, ts->idle_to_busy_ts)); + + total_busy_time_us += last_busy_time_us; + + last_index--; + /* Handle case idle_busy_ts_idx was 0 */ + if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) + last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; + + ts = &hdev->idle_busy_ts_arr[last_index]; + + overlap_cnt++; + } + + total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us, + USEC_PER_MSEC); + + return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms); +} + /* * hl_device_set_frequency - set the frequency of the device * * @hdev: pointer to habanalabs device structure * @freq: the new frequency value * - * Change the frequency if needed. - * We allose to set PLL to low only if there is no user process - * Returns 0 if no change was done, otherwise returns 1; + * Change the frequency if needed. This function has no protection against + * concurrency, therefore it is assumed that the calling function has protected + * itself against the case of calling this function from multiple threads with + * different values + * + * Returns 0 if no change was done, otherwise returns 1 */ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq) { - enum hl_pll_frequency old_freq = - (freq == PLL_HIGH) ? PLL_LOW : PLL_HIGH; - int ret; - - if (hdev->pm_mng_profile == PM_MANUAL) + if ((hdev->pm_mng_profile == PM_MANUAL) || + (hdev->curr_pll_profile == freq)) return 0; - ret = atomic_cmpxchg(&hdev->curr_pll_profile, old_freq, freq); - if (ret == freq) - return 0; - - /* - * in case we want to lower frequency, check if device is not - * opened. We must have a check here to workaround race condition with - * hl_device_open - */ - if ((freq == PLL_LOW) && (atomic_read(&hdev->fd_open_cnt) > 0)) { - atomic_set(&hdev->curr_pll_profile, PLL_HIGH); - return 0; - } - dev_dbg(hdev->dev, "Changing device frequency to %s\n", freq == PLL_HIGH ? "high" : "low"); hdev->asic_funcs->set_pll_profile(hdev, freq); + hdev->curr_pll_profile = freq; + return 1; } @@ -449,19 +611,8 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) goto out; } - mutex_lock(&hdev->fd_open_cnt_lock); - - if (atomic_read(&hdev->fd_open_cnt) > 1) { - dev_err(hdev->dev, - "Failed to enable debug mode. More then a single user is using the device\n"); - rc = -EPERM; - goto unlock_fd_open_lock; - } - hdev->in_debug = 1; -unlock_fd_open_lock: - mutex_unlock(&hdev->fd_open_cnt_lock); out: mutex_unlock(&hdev->debug_lock); @@ -568,6 +719,7 @@ disable_device: static void device_kill_open_processes(struct hl_device *hdev) { u16 pending_total, pending_cnt; + struct hl_fpriv *hpriv; struct task_struct *task = NULL; if (hdev->pldm) @@ -575,32 +727,31 @@ static void device_kill_open_processes(struct hl_device *hdev) else pending_total = HL_PENDING_RESET_PER_SEC; - pending_cnt = pending_total; - - /* Flush all processes that are inside hl_open */ - mutex_lock(&hdev->fd_open_cnt_lock); - - while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) { - - pending_cnt--; - - dev_info(hdev->dev, - "Can't HARD reset, waiting for user to close FD\n"); + /* Giving time for user to close FD, and for processes that are inside + * hl_device_open to finish + */ + if (!list_empty(&hdev->fpriv_list)) ssleep(1); - } - if (atomic_read(&hdev->fd_open_cnt)) { - task = get_pid_task(hdev->user_ctx->hpriv->taskpid, - PIDTYPE_PID); + mutex_lock(&hdev->fpriv_list_lock); + + /* This section must be protected because we are dereferencing + * pointers that are freed if the process exits + */ + list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) { + task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); if (task) { - dev_info(hdev->dev, "Killing user processes\n"); + dev_info(hdev->dev, "Killing user process pid=%d\n", + task_pid_nr(task)); send_sig(SIGKILL, task, 1); - msleep(100); + usleep_range(1000, 10000); put_task_struct(task); } } + mutex_unlock(&hdev->fpriv_list_lock); + /* We killed the open users, but because the driver cleans up after the * user contexts are closed (e.g. mmu mappings), we need to wait again * to make sure the cleaning phase is finished before continuing with @@ -609,19 +760,18 @@ static void device_kill_open_processes(struct hl_device *hdev) pending_cnt = pending_total; - while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) { + while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) { + dev_info(hdev->dev, + "Waiting for all unmap operations to finish before hard reset\n"); pending_cnt--; ssleep(1); } - if (atomic_read(&hdev->fd_open_cnt)) + if (!list_empty(&hdev->fpriv_list)) dev_crit(hdev->dev, "Going to hard reset with open user contexts\n"); - - mutex_unlock(&hdev->fd_open_cnt_lock); - } static void device_hard_reset_pending(struct work_struct *work) @@ -630,8 +780,6 @@ static void device_hard_reset_pending(struct work_struct *work) container_of(work, struct hl_device_reset_work, reset_work); struct hl_device *hdev = device_reset_work->hdev; - device_kill_open_processes(hdev); - hl_device_reset(hdev, true, true); kfree(device_reset_work); @@ -679,13 +827,16 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, /* This also blocks future CS/VM/JOB completion operations */ hdev->disabled = true; - /* - * Flush anyone that is inside the critical section of enqueue + /* Flush anyone that is inside the critical section of enqueue * jobs to the H/W */ hdev->asic_funcs->hw_queues_lock(hdev); hdev->asic_funcs->hw_queues_unlock(hdev); + /* Flush anyone that is inside device open */ + mutex_lock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_list_lock); + dev_err(hdev->dev, "Going to RESET device!\n"); } @@ -736,6 +887,19 @@ again: /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev); + if (hard_reset) { + /* Kill processes here after CS rollback. This is because the + * process can't really exit until all its CSs are done, which + * is what we do in cs rollback + */ + device_kill_open_processes(hdev); + + /* Flush the Event queue workers to make sure no other thread is + * reading or writing to registers during the reset + */ + flush_workqueue(hdev->eq_wq); + } + /* Release kernel context */ if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1)) hdev->kernel_ctx = NULL; @@ -754,12 +918,24 @@ again: for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_reset(hdev, &hdev->completion_queue[i]); + hdev->idle_busy_ts_idx = 0; + hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0); + hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0); + + if (hdev->cs_active_cnt) + dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n", + hdev->cs_active_cnt); + + mutex_lock(&hdev->fpriv_list_lock); + /* Make sure the context switch phase will run again */ - if (hdev->user_ctx) { - atomic_set(&hdev->user_ctx->thread_ctx_switch_token, 1); - hdev->user_ctx->thread_ctx_switch_wait_token = 0; + if (hdev->compute_ctx) { + atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1); + hdev->compute_ctx->thread_ctx_switch_wait_token = 0; } + mutex_unlock(&hdev->fpriv_list_lock); + /* Finished tear-down, starting to re-initialize */ if (hard_reset) { @@ -788,7 +964,7 @@ again: goto out_err; } - hdev->user_ctx = NULL; + hdev->compute_ctx = NULL; rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); if (rc) { @@ -849,6 +1025,8 @@ again: else hdev->soft_reset_cnt++; + dev_warn(hdev->dev, "Successfully finished resetting the device\n"); + return 0; out_err: @@ -883,17 +1061,43 @@ out_err: int hl_device_init(struct hl_device *hdev, struct class *hclass) { int i, rc, cq_ready_cnt; + char *name; + bool add_cdev_sysfs_on_err = false; - /* Create device */ - rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops); + name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2); + if (!name) { + rc = -ENOMEM; + goto out_disabled; + } + + /* Initialize cdev and device structures */ + rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name, + &hdev->cdev, &hdev->dev); + + kfree(name); if (rc) goto out_disabled; + name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2); + if (!name) { + rc = -ENOMEM; + goto free_dev; + } + + /* Initialize cdev and device structures for control device */ + rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops, + name, &hdev->cdev_ctrl, &hdev->dev_ctrl); + + kfree(name); + + if (rc) + goto free_dev; + /* Initialize ASIC function pointers and perform early init */ rc = device_early_init(hdev); if (rc) - goto release_device; + goto free_dev_ctrl; /* * Start calling ASIC initialization. First S/W then H/W and finally @@ -965,7 +1169,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) goto mmu_fini; } - hdev->user_ctx = NULL; + hdev->compute_ctx = NULL; rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); if (rc) { @@ -980,12 +1184,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) goto release_ctx; } - rc = hl_sysfs_init(hdev); - if (rc) { - dev_err(hdev->dev, "failed to initialize sysfs\n"); - goto free_cb_pool; - } - hl_debugfs_add_device(hdev); if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { @@ -994,6 +1192,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) hdev->asic_funcs->hw_fini(hdev, true); } + /* + * From this point, in case of an error, add char devices and create + * sysfs nodes as part of the error flow, to allow debugging. + */ + add_cdev_sysfs_on_err = true; + rc = hdev->asic_funcs->hw_init(hdev); if (rc) { dev_err(hdev->dev, "failed to initialize the H/W\n"); @@ -1030,9 +1234,24 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) } /* - * hl_hwmon_init must be called after device_late_init, because only + * Expose devices and sysfs nodes to user. + * From here there is no need to add char devices and create sysfs nodes + * in case of an error. + */ + add_cdev_sysfs_on_err = false; + rc = device_cdev_sysfs_add(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to add char devices and sysfs nodes\n"); + rc = 0; + goto out_disabled; + } + + /* + * hl_hwmon_init() must be called after device_late_init(), because only * there we get the information from the device about which - * hwmon-related sensors the device supports + * hwmon-related sensors the device supports. + * Furthermore, it must be done after adding the device to the system. */ rc = hl_hwmon_init(hdev); if (rc) { @@ -1048,8 +1267,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) return 0; -free_cb_pool: - hl_cb_pool_fini(hdev); release_ctx: if (hl_ctx_put(hdev->kernel_ctx) != 1) dev_err(hdev->dev, @@ -1068,18 +1285,21 @@ sw_fini: hdev->asic_funcs->sw_fini(hdev); early_fini: device_early_fini(hdev); -release_device: - device_destroy(hclass, hdev->dev->devt); - cdev_del(&hdev->cdev); +free_dev_ctrl: + kfree(hdev->dev_ctrl); +free_dev: + kfree(hdev->dev); out_disabled: hdev->disabled = true; + if (add_cdev_sysfs_on_err) + device_cdev_sysfs_add(hdev); if (hdev->pdev) dev_err(&hdev->pdev->dev, "Failed to initialize hl%d. Device is NOT usable !\n", - hdev->id); + hdev->id / 2); else pr_err("Failed to initialize hl%d. Device is NOT usable !\n", - hdev->id); + hdev->id / 2); return rc; } @@ -1120,16 +1340,17 @@ void hl_device_fini(struct hl_device *hdev) /* Mark device as disabled */ hdev->disabled = true; - /* - * Flush anyone that is inside the critical section of enqueue + /* Flush anyone that is inside the critical section of enqueue * jobs to the H/W */ hdev->asic_funcs->hw_queues_lock(hdev); hdev->asic_funcs->hw_queues_unlock(hdev); - hdev->hard_reset_pending = true; + /* Flush anyone that is inside device open */ + mutex_lock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_list_lock); - device_kill_open_processes(hdev); + hdev->hard_reset_pending = true; hl_hwmon_fini(hdev); @@ -1137,8 +1358,6 @@ void hl_device_fini(struct hl_device *hdev) hl_debugfs_remove_device(hdev); - hl_sysfs_fini(hdev); - /* * Halt the engines and disable interrupts so we won't get any more * completions from H/W and we won't have any accesses from the @@ -1149,6 +1368,12 @@ void hl_device_fini(struct hl_device *hdev) /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev); + /* Kill processes here after CS rollback. This is because the process + * can't really exit until all its CSs are done, which is what we + * do in cs rollback + */ + device_kill_open_processes(hdev); + hl_cb_pool_fini(hdev); /* Release kernel context */ @@ -1175,9 +1400,8 @@ void hl_device_fini(struct hl_device *hdev) device_early_fini(hdev); - /* Hide device from user */ - device_destroy(hdev->dev->class, hdev->dev->devt); - cdev_del(&hdev->cdev); + /* Hide devices and sysfs nodes from user */ + device_cdev_sysfs_del(hdev); pr_info("removed device successfully\n"); } diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index ea2ca67fbfbf..f5bd03171dac 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -143,10 +143,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev) sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); if (!rc) { - if (result == ARMCP_PACKET_FENCE_VAL) - dev_info(hdev->dev, - "queue test on CPU queue succeeded\n"); - else + if (result != ARMCP_PACKET_FENCE_VAL) dev_err(hdev->dev, "CPU queue test failed (0x%08lX)\n", result); } else { diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 271c5c8f53b4..7344e8a222ae 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -9,6 +9,7 @@ #include "include/hw_ip/mmu/mmu_general.h" #include "include/hw_ip/mmu/mmu_v1_0.h" #include "include/goya/asic_reg/goya_masks.h" +#include "include/goya/goya_reg_map.h" #include <linux/pci.h> #include <linux/genalloc.h> @@ -41,8 +42,8 @@ * PQ, CQ and CP are not secured. * PQ, CB and the data are on the SRAM/DRAM. * - * Since QMAN DMA is secured, KMD is parsing the DMA CB: - * - KMD checks DMA pointer + * Since QMAN DMA is secured, the driver is parsing the DMA CB: + * - checks DMA pointer * - WREG, MSG_PROT are not allowed. * - MSG_LONG/SHORT are allowed. * @@ -55,15 +56,15 @@ * QMAN DMA: PQ, CQ and CP are secured. * MMU is set to bypass on the Secure props register of the QMAN. * The reasons we don't enable MMU for PQ, CQ and CP are: - * - PQ entry is in kernel address space and KMD doesn't map it. + * - PQ entry is in kernel address space and the driver doesn't map it. * - CP writes to MSIX register and to kernel address space (completion * queue). * - * DMA is not secured but because CP is secured, KMD still needs to parse the - * CB, but doesn't need to check the DMA addresses. + * DMA is not secured but because CP is secured, the driver still needs to parse + * the CB, but doesn't need to check the DMA addresses. * - * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD - * doesn't map memory in MMU. + * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and + * the driver doesn't map memory in MMU. * * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode) * @@ -71,6 +72,9 @@ * */ +#define GOYA_UBOOT_FW_FILE "habanalabs/goya/goya-u-boot.bin" +#define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb" + #define GOYA_MMU_REGS_NUM 63 #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ @@ -335,18 +339,21 @@ void goya_get_fixed_properties(struct hl_device *hdev) for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; - prop->hw_queues_props[i].kmd_only = 0; + prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 1; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; - prop->hw_queues_props[i].kmd_only = 1; + prop->hw_queues_props[i].driver_only = 1; + prop->hw_queues_props[i].requires_kernel_cb = 0; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES + NUMBER_OF_INT_HW_QUEUES; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_INT; - prop->hw_queues_props[i].kmd_only = 0; + prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 0; } for (; i < HL_MAX_QUEUES; i++) @@ -376,6 +383,23 @@ void goya_get_fixed_properties(struct hl_device *hdev) prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; + prop->dmmu.hop0_shift = HOP0_SHIFT; + prop->dmmu.hop1_shift = HOP1_SHIFT; + prop->dmmu.hop2_shift = HOP2_SHIFT; + prop->dmmu.hop3_shift = HOP3_SHIFT; + prop->dmmu.hop4_shift = HOP4_SHIFT; + prop->dmmu.hop0_mask = HOP0_MASK; + prop->dmmu.hop1_mask = HOP1_MASK; + prop->dmmu.hop2_mask = HOP2_MASK; + prop->dmmu.hop3_mask = HOP3_MASK; + prop->dmmu.hop4_mask = HOP4_MASK; + prop->dmmu.huge_page_size = PAGE_SIZE_2MB; + + /* No difference between PMMU and DMMU except of page size */ + memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); + prop->dmmu.page_size = PAGE_SIZE_2MB; + prop->pmmu.page_size = PAGE_SIZE_4KB; + prop->va_space_host_start_address = VA_HOST_SPACE_START; prop->va_space_host_end_address = VA_HOST_SPACE_END; prop->va_space_dram_start_address = VA_DDR_SPACE_START; @@ -392,6 +416,9 @@ void goya_get_fixed_properties(struct hl_device *hdev) prop->tpc_enabled_mask = TPC_ENABLED_MASK; prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; + + strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); } /* @@ -1006,36 +1033,34 @@ int goya_init_cpu_queues(struct hl_device *hdev) eq = &hdev->event_queue; - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0, - lower_32_bits(cpu_pq->bus_address)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1, - upper_32_bits(cpu_pq->bus_address)); + WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); + WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(eq->bus_address)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address)); + WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); + WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8, + WREG32(mmCPU_CQ_BASE_ADDR_LOW, lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9, + WREG32(mmCPU_CQ_BASE_ADDR_HIGH, upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, HL_CPU_ACCESSIBLE_MEM_SIZE); + WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); + WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); + WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); /* Used for EQ CI */ - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0); + WREG32(mmCPU_EQ_CI, 0); WREG32(mmCPU_IF_PF_PQ_PI, 0); - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP); + WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP); WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GOYA_ASYNC_EVENT_ID_PI_UPDATE); err = hl_poll_timeout( hdev, - mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, + mmCPU_PQ_INIT_STATUS, status, (status == PQ_INIT_STATUS_READY_FOR_HOST), 1000, @@ -1455,6 +1480,9 @@ static void goya_init_golden_registers(struct hl_device *hdev) 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT); WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset, 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT); + + WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset, + ICACHE_FETCH_LINE_NUM, 2); } WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT); @@ -1534,7 +1562,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev) u32 mtr_base_lo, mtr_base_hi; u32 so_base_lo, so_base_hi; u32 gic_base_lo, gic_base_hi; - u64 qman_base_addr; mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); @@ -1546,9 +1573,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev) gic_base_hi = upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); - qman_base_addr = hdev->asic_prop.sram_base_address + - MME_QMAN_BASE_OFFSET; - WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo); @@ -2063,6 +2087,25 @@ static void goya_disable_msix(struct hl_device *hdev) goya->hw_cap_initialized &= ~HW_CAP_MSIX; } +static void goya_enable_timestamp(struct hl_device *hdev) +{ + /* Disable the timestamp counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); + + /* Zero the lower/upper parts of the 64-bit counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); + + /* Enable the counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); +} + +static void goya_disable_timestamp(struct hl_device *hdev) +{ + /* Disable the timestamp counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); +} + static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) { u32 wait_timeout_ms, cpu_timeout_ms; @@ -2103,6 +2146,8 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) goya_disable_external_queues(hdev); goya_disable_internal_queues(hdev); + goya_disable_timestamp(hdev); + if (hard_reset) { goya_disable_msix(hdev); goya_mmu_remove_device_cpu_mappings(hdev); @@ -2121,13 +2166,11 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) */ static int goya_push_uboot_to_device(struct hl_device *hdev) { - char fw_name[200]; void __iomem *dst; - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin"); dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET; - return hl_fw_push_fw_to_device(hdev, fw_name, dst); + return hl_fw_push_fw_to_device(hdev, GOYA_UBOOT_FW_FILE, dst); } /* @@ -2140,18 +2183,16 @@ static int goya_push_uboot_to_device(struct hl_device *hdev) */ static int goya_push_linux_to_device(struct hl_device *hdev) { - char fw_name[200]; void __iomem *dst; - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb"); dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; - return hl_fw_push_fw_to_device(hdev, fw_name, dst); + return hl_fw_push_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst); } static int goya_pldm_init_cpu(struct hl_device *hdev) { - u32 val, unit_rst_val; + u32 unit_rst_val; int rc; /* Must initialize SRAM scrambler before pushing u-boot to SRAM */ @@ -2159,14 +2200,14 @@ static int goya_pldm_init_cpu(struct hl_device *hdev) /* Put ARM cores into reset */ WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT); - val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL); + RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL); /* Reset the CA53 MACRO */ unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N); WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET); - val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N); + RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N); WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val); - val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N); + RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N); rc = goya_push_uboot_to_device(hdev); if (rc) @@ -2187,7 +2228,7 @@ static int goya_pldm_init_cpu(struct hl_device *hdev) /* Release ARM core 0 from reset */ WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_CORE0_DEASSERT); - val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL); + RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL); return 0; } @@ -2205,12 +2246,12 @@ static void goya_read_device_fw_version(struct hl_device *hdev, switch (fwc) { case FW_COMP_UBOOT: - ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29); + ver_off = RREG32(mmUBOOT_VER_OFFSET); dest = hdev->asic_prop.uboot_ver; name = "U-Boot"; break; case FW_COMP_PREBOOT: - ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28); + ver_off = RREG32(mmPREBOOT_VER_OFFSET); dest = hdev->asic_prop.preboot_ver; name = "Preboot"; break; @@ -2271,6 +2312,10 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) 10000, cpu_timeout); + /* Read U-Boot version now in case we will later fail */ + goya_read_device_fw_version(hdev, FW_COMP_UBOOT); + goya_read_device_fw_version(hdev, FW_COMP_PREBOOT); + if (rc) { dev_err(hdev->dev, "Error in ARM u-boot!"); switch (status) { @@ -2308,6 +2353,11 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) "ARM status %d - u-boot stopped by user\n", status); break; + case CPU_BOOT_STATUS_TS_INIT_FAIL: + dev_err(hdev->dev, + "ARM status %d - Thermal Sensor initialization failed\n", + status); + break; default: dev_err(hdev->dev, "ARM status %d - Invalid status code\n", @@ -2317,10 +2367,6 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) return -EIO; } - /* Read U-Boot version now in case we will later fail */ - goya_read_device_fw_version(hdev, FW_COMP_UBOOT); - goya_read_device_fw_version(hdev, FW_COMP_PREBOOT); - if (!hdev->fw_loading) { dev_info(hdev->dev, "Skip loading FW\n"); goto out; @@ -2433,7 +2479,8 @@ int goya_mmu_init(struct hl_device *hdev) WREG32_AND(mmSTLB_STLB_FEATURE_EN, (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK)); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, + VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK); WREG32(mmMMU_MMU_ENABLE, 1); WREG32(mmMMU_SPI_MASK, 0xF); @@ -2455,13 +2502,12 @@ err: static int goya_hw_init(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 val; int rc; dev_info(hdev->dev, "Starting initialization of H/W\n"); /* Perform read from the device to make sure device is up */ - val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); /* * Let's mark in the H/W that we have reached this point. We check @@ -2469,7 +2515,7 @@ static int goya_hw_init(struct hl_device *hdev) * we need to reset the chip before doing H/W init. This register is * cleared by the H/W upon H/W reset */ - WREG32(mmPSOC_GLOBAL_CONF_APP_STATUS, HL_DEVICE_HW_STATE_DIRTY); + WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC); if (rc) { @@ -2505,13 +2551,15 @@ static int goya_hw_init(struct hl_device *hdev) goya_init_tpc_qmans(hdev); + goya_enable_timestamp(hdev); + /* MSI-X must be enabled before CPU queues are initialized */ rc = goya_enable_msix(hdev); if (rc) goto disable_queues; /* Perform read from the device to flush all MSI-X configuration */ - val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); return 0; @@ -2831,7 +2879,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) { dev_err_ratelimited(hdev->dev, - "Can't send KMD job on QMAN0 because the device is not idle\n"); + "Can't send driver job on QMAN0 because the device is not idle\n"); return -EBUSY; } @@ -2956,9 +3004,6 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", hw_queue_id, (unsigned long long) fence_dma_addr, tmp); rc = -EIO; - } else { - dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n", - hw_queue_id); } free_pkt: @@ -3903,7 +3948,7 @@ static int goya_parse_cb_no_ext_queue(struct hl_device *hdev, return 0; dev_err(hdev->dev, - "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n", + "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n", parser->user_cb, parser->user_cb_size); return -EFAULT; @@ -3913,7 +3958,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) { struct goya_device *goya = hdev->asic_specific; - if (!parser->ext_queue) + if (parser->queue_type == QUEUE_TYPE_INT) return goya_parse_cb_no_ext_queue(hdev, parser); if (goya->hw_cap_initialized & HW_CAP_MMU) @@ -3949,7 +3994,7 @@ void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address, void goya_update_eq_ci(struct hl_device *hdev, u32 val) { - WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val); + WREG32(mmCPU_EQ_CI, val); } void goya_restore_phase_topology(struct hl_device *hdev) @@ -4447,6 +4492,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) struct goya_device *goya = hdev->asic_specific; goya->events_stat[event_type]++; + goya->events_stat_aggregate[event_type]++; switch (event_type) { case GOYA_ASYNC_EVENT_ID_PCIE_IF: @@ -4528,12 +4574,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) } } -void *goya_get_events_stat(struct hl_device *hdev, u32 *size) +void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) { struct goya_device *goya = hdev->asic_specific; - *size = (u32) sizeof(goya->events_stat); + if (aggregate) { + *size = (u32) sizeof(goya->events_stat_aggregate); + return goya->events_stat_aggregate; + } + *size = (u32) sizeof(goya->events_stat); return goya->events_stat; } @@ -4579,7 +4629,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, lin_dma_pkt++; } while (--lin_dma_pkts_cnt); - job = hl_cs_allocate_job(hdev, true); + job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); if (!job) { dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; @@ -4808,13 +4858,15 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid); } -static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard) +static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, + u32 flags) { struct goya_device *goya = hdev->asic_specific; u32 status, timeout_usec; int rc; - if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + if (!(goya->hw_cap_initialized & HW_CAP_MMU) || + hdev->hard_reset_pending) return; /* no need in L1 only invalidation in Goya */ @@ -4853,7 +4905,8 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev, u32 status, timeout_usec, inv_data, pi; int rc; - if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + if (!(goya->hw_cap_initialized & HW_CAP_MMU) || + hdev->hard_reset_pending) return; /* no need in L1 only invalidation in Goya */ @@ -4934,6 +4987,10 @@ int goya_armcp_info_get(struct hl_device *hdev) prop->dram_end_address = prop->dram_base_address + dram_size; } + if (!strlen(prop->armcp_info.card_name)) + strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); + return 0; } @@ -5047,7 +5104,7 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data, static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev) { - return RREG32(mmPSOC_GLOBAL_CONF_APP_STATUS); + return RREG32(mmHW_STATE); } static const struct hl_asic_funcs goya_funcs = { @@ -5106,7 +5163,8 @@ static const struct hl_asic_funcs goya_funcs = { .init_iatu = goya_init_iatu, .rreg = hl_rreg, .wreg = hl_wreg, - .halt_coresight = goya_halt_coresight + .halt_coresight = goya_halt_coresight, + .get_clk_rate = goya_get_clk_rate }; /* diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index d7f48c9c41cd..c3230cb6e25c 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -55,6 +55,8 @@ #define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */ +#define GOYA_DEFAULT_CARD_NAME "HL1000" + /* DRAM Memory Map */ #define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */ @@ -68,19 +70,19 @@ MMU_PAGE_TABLES_SIZE) #define MMU_CACHE_MNG_ADDR (MMU_DRAM_DEFAULT_PAGE_ADDR + \ MMU_DRAM_DEFAULT_PAGE_SIZE) -#define DRAM_KMD_END_ADDR (MMU_CACHE_MNG_ADDR + \ +#define DRAM_DRIVER_END_ADDR (MMU_CACHE_MNG_ADDR + \ MMU_CACHE_MNG_SIZE) #define DRAM_BASE_ADDR_USER 0x20000000 -#if (DRAM_KMD_END_ADDR > DRAM_BASE_ADDR_USER) -#error "KMD must reserve no more than 512MB" +#if (DRAM_DRIVER_END_ADDR > DRAM_BASE_ADDR_USER) +#error "Driver must reserve no more than 512MB" #endif /* - * SRAM Memory Map for KMD + * SRAM Memory Map for Driver * - * KMD occupies KMD_SRAM_SIZE bytes from the start of SRAM. It is used for + * Driver occupies DRIVER_SRAM_SIZE bytes from the start of SRAM. It is used for * MME/TPC QMANs * */ @@ -106,10 +108,10 @@ #define TPC7_QMAN_BASE_OFFSET (TPC6_QMAN_BASE_OFFSET + \ (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) -#define SRAM_KMD_RES_OFFSET (TPC7_QMAN_BASE_OFFSET + \ +#define SRAM_DRIVER_RES_OFFSET (TPC7_QMAN_BASE_OFFSET + \ (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)) -#if (SRAM_KMD_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START) +#if (SRAM_DRIVER_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START) #error "MME/TPC QMANs SRAM space exceeds limit" #endif @@ -162,6 +164,7 @@ struct goya_device { u64 ddr_bar_cur_addr; u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE]; + u32 events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE]; u32 hw_cap_initialized; u8 device_cpu_mmu_mappings_done; }; @@ -215,7 +218,7 @@ int goya_suspend(struct hl_device *hdev); int goya_resume(struct hl_device *hdev); void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry); -void *goya_get_events_stat(struct hl_device *hdev, u32 *size); +void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size); void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec); @@ -230,4 +233,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev); +int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); + #endif /* GOYAP_H_ */ diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index d7ec7ad84cc6..c1ee6e2b5dff 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -8,6 +8,7 @@ #include "goyaP.h" #include "include/goya/goya_coresight.h" #include "include/goya/asic_reg/goya_regs.h" +#include "include/goya/asic_reg/goya_masks.h" #include <uapi/misc/habanalabs.h> @@ -15,6 +16,10 @@ #define GOYA_PLDM_CORESIGHT_TIMEOUT_USEC (CORESIGHT_TIMEOUT_USEC * 100) +#define SPMU_SECTION_SIZE DMA_CH_0_CS_SPMU_MAX_OFFSET +#define SPMU_EVENT_TYPES_OFFSET 0x400 +#define SPMU_MAX_COUNTERS 6 + static u64 debug_stm_regs[GOYA_STM_LAST + 1] = { [GOYA_STM_CPU] = mmCPU_STM_BASE, [GOYA_STM_DMA_CH_0_CS] = mmDMA_CH_0_CS_STM_BASE, @@ -226,9 +231,16 @@ static int goya_config_stm(struct hl_device *hdev, struct hl_debug_params *params) { struct hl_debug_params_stm *input; - u64 base_reg = debug_stm_regs[params->reg_idx] - CFG_BASE; + u64 base_reg; int rc; + if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) { + dev_err(hdev->dev, "Invalid register index in STM\n"); + return -EINVAL; + } + + base_reg = debug_stm_regs[params->reg_idx] - CFG_BASE; + WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); if (params->enable) { @@ -288,10 +300,17 @@ static int goya_config_etf(struct hl_device *hdev, struct hl_debug_params *params) { struct hl_debug_params_etf *input; - u64 base_reg = debug_etf_regs[params->reg_idx] - CFG_BASE; + u64 base_reg; u32 val; int rc; + if (params->reg_idx >= ARRAY_SIZE(debug_etf_regs)) { + dev_err(hdev->dev, "Invalid register index in ETF\n"); + return -EINVAL; + } + + base_reg = debug_etf_regs[params->reg_idx] - CFG_BASE; + WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); val = RREG32(base_reg + 0x304); @@ -359,33 +378,32 @@ static int goya_config_etr(struct hl_device *hdev, struct hl_debug_params *params) { struct hl_debug_params_etr *input; - u64 base_reg = mmPSOC_ETR_BASE - CFG_BASE; u32 val; int rc; - WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK); - val = RREG32(base_reg + 0x304); + val = RREG32(mmPSOC_ETR_FFCR); val |= 0x1000; - WREG32(base_reg + 0x304, val); + WREG32(mmPSOC_ETR_FFCR, val); val |= 0x40; - WREG32(base_reg + 0x304, val); + WREG32(mmPSOC_ETR_FFCR, val); - rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false); + rc = goya_coresight_timeout(hdev, mmPSOC_ETR_FFCR, 6, false); if (rc) { dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", params->enable ? "enable" : "disable", rc); return rc; } - rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true); + rc = goya_coresight_timeout(hdev, mmPSOC_ETR_STS, 2, true); if (rc) { dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", params->enable ? "enable" : "disable", rc); return rc; } - WREG32(base_reg + 0x20, 0); + WREG32(mmPSOC_ETR_CTL, 0); if (params->enable) { input = params->input; @@ -405,25 +423,26 @@ static int goya_config_etr(struct hl_device *hdev, return -EINVAL; } - WREG32(base_reg + 0x34, 0x3FFC); - WREG32(base_reg + 0x4, input->buffer_size); - WREG32(base_reg + 0x28, input->sink_mode); - WREG32(base_reg + 0x110, 0x700); - WREG32(base_reg + 0x118, + WREG32(mmPSOC_ETR_BUFWM, 0x3FFC); + WREG32(mmPSOC_ETR_RSZ, input->buffer_size); + WREG32(mmPSOC_ETR_MODE, input->sink_mode); + WREG32(mmPSOC_ETR_AXICTL, + 0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT); + WREG32(mmPSOC_ETR_DBALO, lower_32_bits(input->buffer_address)); - WREG32(base_reg + 0x11C, + WREG32(mmPSOC_ETR_DBAHI, upper_32_bits(input->buffer_address)); - WREG32(base_reg + 0x304, 3); - WREG32(base_reg + 0x308, 0xA); - WREG32(base_reg + 0x20, 1); + WREG32(mmPSOC_ETR_FFCR, 3); + WREG32(mmPSOC_ETR_PSCR, 0xA); + WREG32(mmPSOC_ETR_CTL, 1); } else { - WREG32(base_reg + 0x34, 0); - WREG32(base_reg + 0x4, 0x400); - WREG32(base_reg + 0x118, 0); - WREG32(base_reg + 0x11C, 0); - WREG32(base_reg + 0x308, 0); - WREG32(base_reg + 0x28, 0); - WREG32(base_reg + 0x304, 0); + WREG32(mmPSOC_ETR_BUFWM, 0); + WREG32(mmPSOC_ETR_RSZ, 0x400); + WREG32(mmPSOC_ETR_DBALO, 0); + WREG32(mmPSOC_ETR_DBAHI, 0); + WREG32(mmPSOC_ETR_PSCR, 0); + WREG32(mmPSOC_ETR_MODE, 0); + WREG32(mmPSOC_ETR_FFCR, 0); if (params->output_size >= sizeof(u64)) { u32 rwp, rwphi; @@ -433,8 +452,8 @@ static int goya_config_etr(struct hl_device *hdev, * the buffer is set in the RWP register (lower 32 * bits), and in the RWPHI register (upper 8 bits). */ - rwp = RREG32(base_reg + 0x18); - rwphi = RREG32(base_reg + 0x3c) & 0xff; + rwp = RREG32(mmPSOC_ETR_RWP); + rwphi = RREG32(mmPSOC_ETR_RWPHI) & 0xff; *(u64 *) params->output = ((u64) rwphi << 32) | rwp; } } @@ -445,11 +464,18 @@ static int goya_config_etr(struct hl_device *hdev, static int goya_config_funnel(struct hl_device *hdev, struct hl_debug_params *params) { - WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE + 0xFB0, - CORESIGHT_UNLOCK); + u64 base_reg; + + if (params->reg_idx >= ARRAY_SIZE(debug_funnel_regs)) { + dev_err(hdev->dev, "Invalid register index in FUNNEL\n"); + return -EINVAL; + } + + base_reg = debug_funnel_regs[params->reg_idx] - CFG_BASE; - WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE, - params->enable ? 0x33F : 0); + WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + + WREG32(base_reg, params->enable ? 0x33F : 0); return 0; } @@ -458,9 +484,16 @@ static int goya_config_bmon(struct hl_device *hdev, struct hl_debug_params *params) { struct hl_debug_params_bmon *input; - u64 base_reg = debug_bmon_regs[params->reg_idx] - CFG_BASE; + u64 base_reg; u32 pcie_base = 0; + if (params->reg_idx >= ARRAY_SIZE(debug_bmon_regs)) { + dev_err(hdev->dev, "Invalid register index in BMON\n"); + return -EINVAL; + } + + base_reg = debug_bmon_regs[params->reg_idx] - CFG_BASE; + WREG32(base_reg + 0x104, 1); if (params->enable) { @@ -522,7 +555,7 @@ static int goya_config_bmon(struct hl_device *hdev, static int goya_config_spmu(struct hl_device *hdev, struct hl_debug_params *params) { - u64 base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE; + u64 base_reg; struct hl_debug_params_spmu *input = params->input; u64 *output; u32 output_arr_len; @@ -531,6 +564,13 @@ static int goya_config_spmu(struct hl_device *hdev, u32 cycle_cnt_idx; int i; + if (params->reg_idx >= ARRAY_SIZE(debug_spmu_regs)) { + dev_err(hdev->dev, "Invalid register index in SPMU\n"); + return -EINVAL; + } + + base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE; + if (params->enable) { input = params->input; @@ -539,7 +579,13 @@ static int goya_config_spmu(struct hl_device *hdev, if (input->event_types_num < 3) { dev_err(hdev->dev, - "not enough values for SPMU enable\n"); + "not enough event types values for SPMU enable\n"); + return -EINVAL; + } + + if (input->event_types_num > SPMU_MAX_COUNTERS) { + dev_err(hdev->dev, + "too many event types values for SPMU enable\n"); return -EINVAL; } @@ -547,7 +593,8 @@ static int goya_config_spmu(struct hl_device *hdev, WREG32(base_reg + 0xE04, 0x41013040); for (i = 0 ; i < input->event_types_num ; i++) - WREG32(base_reg + 0x400 + i * 4, input->event_types[i]); + WREG32(base_reg + SPMU_EVENT_TYPES_OFFSET + i * 4, + input->event_types[i]); WREG32(base_reg + 0xE04, 0x41013041); WREG32(base_reg + 0xC00, 0x8000003F); @@ -567,6 +614,12 @@ static int goya_config_spmu(struct hl_device *hdev, return -EINVAL; } + if (events_num > SPMU_MAX_COUNTERS) { + dev_err(hdev->dev, + "too many events values for SPMU disable\n"); + return -EINVAL; + } + WREG32(base_reg + 0xE04, 0x41013040); for (i = 0 ; i < events_num ; i++) @@ -584,24 +637,11 @@ static int goya_config_spmu(struct hl_device *hdev, return 0; } -static int goya_config_timestamp(struct hl_device *hdev, - struct hl_debug_params *params) -{ - WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); - if (params->enable) { - WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); - WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); - WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); - } - - return 0; -} - int goya_debug_coresight(struct hl_device *hdev, void *data) { struct hl_debug_params *params = data; u32 val; - int rc; + int rc = 0; switch (params->op) { case HL_DEBUG_OP_STM: @@ -623,7 +663,7 @@ int goya_debug_coresight(struct hl_device *hdev, void *data) rc = goya_config_spmu(hdev, params); break; case HL_DEBUG_OP_TIMESTAMP: - rc = goya_config_timestamp(hdev, params); + /* Do nothing as this opcode is deprecated */ break; default: diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index 088692c852b6..b2ebc01e27f4 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -32,6 +32,37 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) } } +int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) +{ + long value; + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + value = hl_get_frequency(hdev, MME_PLL, false); + + if (value < 0) { + dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", + value); + return value; + } + + *max_clk = (value / 1000 / 1000); + + value = hl_get_frequency(hdev, MME_PLL, true); + + if (value < 0) { + dev_err(hdev->dev, + "Failed to retrieve device current clock %ld\n", + value); + return value; + } + + *cur_clk = (value / 1000 / 1000); + + return 0; +} + static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -230,18 +261,127 @@ static ssize_t ic_clk_curr_show(struct device *dev, return sprintf(buf, "%lu\n", value); } +static ssize_t pm_mng_profile_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + return sprintf(buf, "%s\n", + (hdev->pm_mng_profile == PM_AUTO) ? "auto" : + (hdev->pm_mng_profile == PM_MANUAL) ? "manual" : + "unknown"); +} + +static ssize_t pm_mng_profile_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + if (hl_device_disabled_or_in_reset(hdev)) { + count = -ENODEV; + goto out; + } + + mutex_lock(&hdev->fpriv_list_lock); + + if (hdev->compute_ctx) { + dev_err(hdev->dev, + "Can't change PM profile while compute context is opened on the device\n"); + count = -EPERM; + goto unlock_mutex; + } + + if (strncmp("auto", buf, strlen("auto")) == 0) { + /* Make sure we are in LOW PLL when changing modes */ + if (hdev->pm_mng_profile == PM_MANUAL) { + hdev->curr_pll_profile = PLL_HIGH; + hl_device_set_frequency(hdev, PLL_LOW); + hdev->pm_mng_profile = PM_AUTO; + } + } else if (strncmp("manual", buf, strlen("manual")) == 0) { + if (hdev->pm_mng_profile == PM_AUTO) { + /* Must release the lock because the work thread also + * takes this lock. But before we release it, set + * the mode to manual so nothing will change if a user + * suddenly opens the device + */ + hdev->pm_mng_profile = PM_MANUAL; + + mutex_unlock(&hdev->fpriv_list_lock); + + /* Flush the current work so we can return to the user + * knowing that he is the only one changing frequencies + */ + flush_delayed_work(&hdev->work_freq); + + return count; + } + } else { + dev_err(hdev->dev, "value should be auto or manual\n"); + count = -EINVAL; + } + +unlock_mutex: + mutex_unlock(&hdev->fpriv_list_lock); +out: + return count; +} + +static ssize_t high_pll_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + return sprintf(buf, "%u\n", hdev->high_pll); +} + +static ssize_t high_pll_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + long value; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) { + count = -ENODEV; + goto out; + } + + rc = kstrtoul(buf, 0, &value); + + if (rc) { + count = -EINVAL; + goto out; + } + + hdev->high_pll = value; + +out: + return count; +} + +static DEVICE_ATTR_RW(high_pll); static DEVICE_ATTR_RW(ic_clk); static DEVICE_ATTR_RO(ic_clk_curr); static DEVICE_ATTR_RW(mme_clk); static DEVICE_ATTR_RO(mme_clk_curr); +static DEVICE_ATTR_RW(pm_mng_profile); static DEVICE_ATTR_RW(tpc_clk); static DEVICE_ATTR_RO(tpc_clk_curr); static struct attribute *goya_dev_attrs[] = { + &dev_attr_high_pll.attr, &dev_attr_ic_clk.attr, &dev_attr_ic_clk_curr.attr, &dev_attr_mme_clk.attr, &dev_attr_mme_clk_curr.attr, + &dev_attr_pm_mng_profile.attr, &dev_attr_tpc_clk.attr, &dev_attr_tpc_clk_curr.attr, NULL, diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index ce83adafcf2d..00c949f4ccd1 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -36,13 +36,15 @@ #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ -#define HL_MAX_QUEUES 128 +#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */ -#define HL_MAX_JOBS_PER_CS 64 +#define HL_MAX_QUEUES 128 /* MUST BE POWER OF 2 and larger than 1 */ #define HL_MAX_PENDING_CS 64 +#define HL_IDLE_BUSY_TS_ARR_SIZE 4096 + /* Memory */ #define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ @@ -81,23 +83,29 @@ struct hl_fpriv; * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's * memories and/or operates the compute engines. * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU. + * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion + * notifications are sent by H/W. */ enum hl_queue_type { QUEUE_TYPE_NA, QUEUE_TYPE_EXT, QUEUE_TYPE_INT, - QUEUE_TYPE_CPU + QUEUE_TYPE_CPU, + QUEUE_TYPE_HW }; /** * struct hw_queue_properties - queue information. * @type: queue type. - * @kmd_only: true if only KMD is allowed to send a job to this queue, false - * otherwise. + * @driver_only: true if only the driver is allowed to send a job to this queue, + * false otherwise. + * @requires_kernel_cb: true if a CB handle must be provided for jobs on this + * queue, false otherwise (a CB address must be provided). */ struct hw_queue_properties { enum hl_queue_type type; - u8 kmd_only; + u8 driver_only; + u8 requires_kernel_cb; }; /** @@ -106,8 +114,8 @@ struct hw_queue_properties { * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address. */ enum vm_type_t { - VM_TYPE_USERPTR, - VM_TYPE_PHYS_PACK + VM_TYPE_USERPTR = 0x1, + VM_TYPE_PHYS_PACK = 0x2 }; /** @@ -123,12 +131,44 @@ enum hl_device_hw_state { }; /** + * struct hl_mmu_properties - ASIC specific MMU address translation properties. + * @hop0_shift: shift of hop 0 mask. + * @hop1_shift: shift of hop 1 mask. + * @hop2_shift: shift of hop 2 mask. + * @hop3_shift: shift of hop 3 mask. + * @hop4_shift: shift of hop 4 mask. + * @hop0_mask: mask to get the PTE address in hop 0. + * @hop1_mask: mask to get the PTE address in hop 1. + * @hop2_mask: mask to get the PTE address in hop 2. + * @hop3_mask: mask to get the PTE address in hop 3. + * @hop4_mask: mask to get the PTE address in hop 4. + * @page_size: default page size used to allocate memory. + * @huge_page_size: page size used to allocate memory with huge pages. + */ +struct hl_mmu_properties { + u64 hop0_shift; + u64 hop1_shift; + u64 hop2_shift; + u64 hop3_shift; + u64 hop4_shift; + u64 hop0_mask; + u64 hop1_mask; + u64 hop2_mask; + u64 hop3_mask; + u64 hop4_mask; + u32 page_size; + u32 huge_page_size; +}; + +/** * struct asic_fixed_properties - ASIC specific immutable properties. * @hw_queues_props: H/W queues properties. * @armcp_info: received various information from ArmCP regarding the H/W, e.g. * available sensors. * @uboot_ver: F/W U-boot version. * @preboot_ver: F/W Preboot version. + * @dmmu: DRAM MMU address translation properties. + * @pmmu: PCI (host) MMU address translation properties. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. @@ -165,53 +205,55 @@ enum hl_device_hw_state { * @psoc_pci_pll_nf: PCI PLL NF value. * @psoc_pci_pll_od: PCI PLL OD value. * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value. - * @completion_queues_count: number of completion queues. * @high_pll: high PLL frequency used by the device. * @cb_pool_cb_cnt: number of CBs in the CB pool. * @cb_pool_cb_size: size of each CB in the CB pool. * @tpc_enabled_mask: which TPCs are enabled. + * @completion_queues_count: number of completion queues. */ struct asic_fixed_properties { struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES]; - struct armcp_info armcp_info; - char uboot_ver[VERSION_MAX_LEN]; - char preboot_ver[VERSION_MAX_LEN]; - u64 sram_base_address; - u64 sram_end_address; - u64 sram_user_base_address; - u64 dram_base_address; - u64 dram_end_address; - u64 dram_user_base_address; - u64 dram_size; - u64 dram_pci_bar_size; - u64 max_power_default; - u64 va_space_host_start_address; - u64 va_space_host_end_address; - u64 va_space_dram_start_address; - u64 va_space_dram_end_address; - u64 dram_size_for_default_page_mapping; - u64 pcie_dbi_base_address; - u64 pcie_aux_dbi_reg_addr; - u64 mmu_pgt_addr; - u64 mmu_dram_default_page_addr; - u32 mmu_pgt_size; - u32 mmu_pte_size; - u32 mmu_hop_table_size; - u32 mmu_hop0_tables_total_size; - u32 dram_page_size; - u32 cfg_size; - u32 sram_size; - u32 max_asid; - u32 num_of_events; - u32 psoc_pci_pll_nr; - u32 psoc_pci_pll_nf; - u32 psoc_pci_pll_od; - u32 psoc_pci_pll_div_factor; - u32 high_pll; - u32 cb_pool_cb_cnt; - u32 cb_pool_cb_size; - u8 completion_queues_count; - u8 tpc_enabled_mask; + struct armcp_info armcp_info; + char uboot_ver[VERSION_MAX_LEN]; + char preboot_ver[VERSION_MAX_LEN]; + struct hl_mmu_properties dmmu; + struct hl_mmu_properties pmmu; + u64 sram_base_address; + u64 sram_end_address; + u64 sram_user_base_address; + u64 dram_base_address; + u64 dram_end_address; + u64 dram_user_base_address; + u64 dram_size; + u64 dram_pci_bar_size; + u64 max_power_default; + u64 va_space_host_start_address; + u64 va_space_host_end_address; + u64 va_space_dram_start_address; + u64 va_space_dram_end_address; + u64 dram_size_for_default_page_mapping; + u64 pcie_dbi_base_address; + u64 pcie_aux_dbi_reg_addr; + u64 mmu_pgt_addr; + u64 mmu_dram_default_page_addr; + u32 mmu_pgt_size; + u32 mmu_pte_size; + u32 mmu_hop_table_size; + u32 mmu_hop0_tables_total_size; + u32 dram_page_size; + u32 cfg_size; + u32 sram_size; + u32 max_asid; + u32 num_of_events; + u32 psoc_pci_pll_nr; + u32 psoc_pci_pll_nf; + u32 psoc_pci_pll_od; + u32 psoc_pci_pll_div_factor; + u32 high_pll; + u32 cb_pool_cb_cnt; + u32 cb_pool_cb_size; + u8 tpc_enabled_mask; + u8 completion_queues_count; }; /** @@ -232,8 +274,6 @@ struct hl_dma_fence { * Command Buffers */ -#define HL_MAX_CB_SIZE 0x200000 /* 2MB */ - /** * struct hl_cb_mgr - describes a Command Buffer Manager. * @cb_lock: protects cb_handles. @@ -320,7 +360,7 @@ struct hl_cs_job; #define HL_EQ_LENGTH 64 #define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE) -/* KMD <-> ArmCP shared memory size */ +/* Host <-> ArmCP shared memory size */ #define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M /** @@ -401,7 +441,7 @@ struct hl_cs_parser; /** * enum hl_pm_mng_profile - power management profile. - * @PM_AUTO: internal clock is set by KMD. + * @PM_AUTO: internal clock is set by the Linux driver. * @PM_MANUAL: internal clock is set by the user. * @PM_LAST: last power management type. */ @@ -477,8 +517,8 @@ enum hl_pll_frequency { * @get_events_stat: retrieve event queue entries histogram. * @read_pte: read MMU page table entry from DRAM. * @write_pte: write MMU page table entry to DRAM. - * @mmu_invalidate_cache: flush MMU STLB cache, either with soft (L1 only) or - * hard (L0 & L1) flush. + * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft + * (L1 only) or hard (L0 & L1) flush. * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. * @send_heartbeat: send is-alive packet to ArmCP and verify response. @@ -498,6 +538,7 @@ enum hl_pll_frequency { * @rreg: Read a register. Needed for simulator support. * @wreg: Write a register. Needed for simulator support. * @halt_coresight: stop the ETF and ETR traces. + * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -554,10 +595,12 @@ struct hl_asic_funcs { struct hl_eq_entry *eq_entry); void (*set_pll_profile)(struct hl_device *hdev, enum hl_pll_frequency freq); - void* (*get_events_stat)(struct hl_device *hdev, u32 *size); + void* (*get_events_stat)(struct hl_device *hdev, bool aggregate, + u32 *size); u64 (*read_pte)(struct hl_device *hdev, u64 addr); void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val); - void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard); + void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard, + u32 flags); void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); @@ -579,6 +622,7 @@ struct hl_asic_funcs { u32 (*rreg)(struct hl_device *hdev, u32 reg); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); void (*halt_coresight)(struct hl_device *hdev); + int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); }; @@ -608,7 +652,7 @@ struct hl_va_range { * descriptor (hl_vm_phys_pg_list or hl_userptr). * @mmu_phys_hash: holds a mapping from physical address to pgt_info structure. * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure. - * @hpriv: pointer to the private (KMD) data of the process (fd). + * @hpriv: pointer to the private (Kernel Driver) data of the process (fd). * @hdev: pointer to the device structure. * @refcount: reference counter for the context. Context is released only when * this hits 0l. It is incremented on CS and CS_WAIT. @@ -634,6 +678,7 @@ struct hl_va_range { * execution phase before the context switch phase * has finished. * @asid: context's unique address space ID in the device's MMU. + * @handle: context's opaque handle for user */ struct hl_ctx { DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS); @@ -655,6 +700,7 @@ struct hl_ctx { atomic_t thread_ctx_switch_token; u32 thread_ctx_switch_wait_token; u32 asid; + u32 handle; }; /** @@ -681,7 +727,7 @@ struct hl_ctx_mgr { * @sgt: pointer to the scatter-gather table that holds the pages. * @dir: for DMA unmapping, the direction must be supplied, so save it. * @debugfs_list: node in debugfs list of command submissions. - * @addr: user-space virtual pointer to the start of the memory area. + * @addr: user-space virtual address of the start of the memory area. * @size: size of the memory area to pin & map. * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. */ @@ -745,11 +791,14 @@ struct hl_cs { * @userptr_list: linked-list of userptr mappings that belong to this job and * wait for completion. * @debugfs_list: node in debugfs list of command submission jobs. + * @queue_type: the type of the H/W queue this job is submitted to. * @id: the id of this job inside a CS. * @hw_queue_id: the id of the H/W queue this job is submitted to. * @user_cb_size: the actual size of the CB we got from the user. * @job_cb_size: the actual size of the CB that we put on the queue. - * @ext_queue: whether the job is for external queue or internal queue. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). */ struct hl_cs_job { struct list_head cs_node; @@ -759,39 +808,44 @@ struct hl_cs_job { struct work_struct finish_work; struct list_head userptr_list; struct list_head debugfs_list; + enum hl_queue_type queue_type; u32 id; u32 hw_queue_id; u32 user_cb_size; u32 job_cb_size; - u8 ext_queue; + u8 is_kernel_allocated_cb; }; /** - * struct hl_cs_parser - command submission paerser properties. + * struct hl_cs_parser - command submission parser properties. * @user_cb: the CB we got from the user. * @patched_cb: in case of patching, this is internal CB which is submitted on * the queue instead of the CB we got from the IOCTL. * @job_userptr_list: linked-list of userptr mappings that belong to the related * job and wait for completion. * @cs_sequence: the sequence number of the related CS. + * @queue_type: the type of the H/W queue this job is submitted to. * @ctx_id: the ID of the context the related CS belongs to. * @hw_queue_id: the id of the H/W queue this job is submitted to. * @user_cb_size: the actual size of the CB we got from the user. * @patched_cb_size: the size of the CB after parsing. - * @ext_queue: whether the job is for external queue or internal queue. * @job_id: the id of the related job inside the related CS. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). */ struct hl_cs_parser { struct hl_cb *user_cb; struct hl_cb *patched_cb; struct list_head *job_userptr_list; u64 cs_sequence; + enum hl_queue_type queue_type; u32 ctx_id; u32 hw_queue_id; u32 user_cb_size; u32 patched_cb_size; - u8 ext_queue; u8 job_id; + u8 is_kernel_allocated_cb; }; @@ -906,23 +960,27 @@ struct hl_debug_params { * @hdev: habanalabs device structure. * @filp: pointer to the given file structure. * @taskpid: current process ID. - * @ctx: current executing context. + * @ctx: current executing context. TODO: remove for multiple ctx per process * @ctx_mgr: context manager to handle multiple context for this FD. * @cb_mgr: command buffer manager to handle multiple buffers for this FD. * @debugfs_list: list of relevant ASIC debugfs. + * @dev_node: node in the device list of file private data * @refcount: number of related contexts. * @restore_phase_mutex: lock for context switch and restore phase. + * @is_control: true for control device, false otherwise */ struct hl_fpriv { struct hl_device *hdev; struct file *filp; struct pid *taskpid; - struct hl_ctx *ctx; /* TODO: remove for multiple ctx */ + struct hl_ctx *ctx; struct hl_ctx_mgr ctx_mgr; struct hl_cb_mgr cb_mgr; struct list_head debugfs_list; + struct list_head dev_node; struct kref refcount; struct mutex restore_phase_mutex; + u8 is_control; }; @@ -1009,7 +1067,7 @@ struct hl_dbg_device_entry { */ /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe - * x16 cards. In extereme cases, there are hosts that can accommodate 16 cards + * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards. */ #define HL_MAX_MINORS 256 @@ -1037,18 +1095,23 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); #define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT #define REG_FIELD_MASK(reg, field) reg##_##field##_MASK -#define WREG32_FIELD(reg, field, val) \ - WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \ - (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_FIELD(reg, offset, field, val) \ + WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \ + ~REG_FIELD_MASK(reg, field)) | \ + (val) << REG_FIELD_SHIFT(reg, field)) +/* Timeout should be longer when working with simulator but cap the + * increased timeout to some maximum + */ #define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \ ({ \ ktime_t __timeout; \ - /* timeout should be longer when working with simulator */ \ if (hdev->pdev) \ __timeout = ktime_add_us(ktime_get(), timeout_us); \ else \ - __timeout = ktime_add_us(ktime_get(), (timeout_us * 10)); \ + __timeout = ktime_add_us(ktime_get(),\ + min((u64)(timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ might_sleep_if(sleep_us); \ for (;;) { \ (val) = RREG32(addr); \ @@ -1080,24 +1143,25 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); mem_written_by_device) \ ({ \ ktime_t __timeout; \ - /* timeout should be longer when working with simulator */ \ if (hdev->pdev) \ __timeout = ktime_add_us(ktime_get(), timeout_us); \ else \ - __timeout = ktime_add_us(ktime_get(), (timeout_us * 10)); \ + __timeout = ktime_add_us(ktime_get(),\ + min((u64)(timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ might_sleep_if(sleep_us); \ for (;;) { \ /* Verify we read updates done by other cores or by device */ \ mb(); \ (val) = *((u32 *) (uintptr_t) (addr)); \ if (mem_written_by_device) \ - (val) = le32_to_cpu(val); \ + (val) = le32_to_cpu(*(__le32 *) &(val)); \ if (cond) \ break; \ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ (val) = *((u32 *) (uintptr_t) (addr)); \ if (mem_written_by_device) \ - (val) = le32_to_cpu(val); \ + (val) = le32_to_cpu(*(__le32 *) &(val)); \ break; \ } \ if (sleep_us) \ @@ -1110,11 +1174,12 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); timeout_us) \ ({ \ ktime_t __timeout; \ - /* timeout should be longer when working with simulator */ \ if (hdev->pdev) \ __timeout = ktime_add_us(ktime_get(), timeout_us); \ else \ - __timeout = ktime_add_us(ktime_get(), (timeout_us * 10)); \ + __timeout = ktime_add_us(ktime_get(),\ + min((u64)(timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ might_sleep_if(sleep_us); \ for (;;) { \ (val) = readl(addr); \ @@ -1143,12 +1208,24 @@ struct hl_device_reset_work { }; /** + * struct hl_device_idle_busy_ts - used for calculating device utilization rate. + * @idle_to_busy_ts: timestamp where device changed from idle to busy. + * @busy_to_idle_ts: timestamp where device changed from busy to idle. + */ +struct hl_device_idle_busy_ts { + ktime_t idle_to_busy_ts; + ktime_t busy_to_idle_ts; +}; + +/** * struct hl_device - habanalabs device structure. * @pdev: pointer to PCI device, can be NULL in case of simulator device. * @pcie_bar: array of available PCIe bars. * @rmmio: configuration area address on SRAM. * @cdev: related char device. - * @dev: realted kernel basic device structure. + * @cdev_ctrl: char device for control operations only (INFO IOCTL) + * @dev: related kernel basic device structure. + * @dev_ctrl: related kernel device structure for the control device * @work_freq: delayed work to lower device frequency if possible. * @work_heartbeat: delayed work for ArmCP is-alive check. * @asic_name: ASIC specific nmae. @@ -1156,25 +1233,19 @@ struct hl_device_reset_work { * @completion_queue: array of hl_cq. * @cq_wq: work queue of completion queues for executing work in process context * @eq_wq: work queue of event queue for executing work in process context. - * @kernel_ctx: KMD context structure. + * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @hw_queues_mirror_list: CS mirror list for TDR. * @hw_queues_mirror_lock: protects hw_queues_mirror_list. * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs. * @event_queue: event queue for IRQ from ArmCP. * @dma_pool: DMA pool for small allocations. - * @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address. - * @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address. - * @cpu_accessible_dma_pool: KMD <-> ArmCP shared memory pool. + * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address. + * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address. + * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool. * @asid_bitmap: holds used/available ASIDs. * @asid_mutex: protects asid_bitmap. - * @fd_open_cnt_lock: lock for updating fd_open_cnt in hl_device_open. Although - * fd_open_cnt is atomic, we need this lock to serialize - * the open function because the driver currently supports - * only a single process at a time. In addition, we need a - * lock here so we can flush user processes which are opening - * the device while we are trying to hard reset it - * @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue. + * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue. * @debug_lock: protects critical section of setting debug mode for device * @asic_prop: ASIC specific immutable properties. * @asic_funcs: ASIC specific functions. @@ -1189,22 +1260,28 @@ struct hl_device_reset_work { * @hl_debugfs: device's debugfs manager. * @cb_pool: list of preallocated CBs. * @cb_pool_lock: protects the CB pool. - * @user_ctx: current user context executing. + * @fpriv_list: list of file private data structures. Each structure is created + * when a user opens the device + * @fpriv_list_lock: protects the fpriv_list + * @compute_ctx: current compute context executing. + * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy + * and vice-versa * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This - * value is saved so in case of hard-reset, KMD will restore this - * value and update the F/W after the re-initialization + * value is saved so in case of hard-reset, the driver will restore + * this value and update the F/W after the re-initialization * @in_reset: is device in reset flow. * @curr_pll_profile: current PLL profile. - * @fd_open_cnt: number of open user processes. * @cs_active_cnt: number of active command submissions on this device (active * means already in H/W queues) - * @major: habanalabs KMD major. + * @major: habanalabs kernel driver major. * @high_pll: high PLL profile frequency. - * @soft_reset_cnt: number of soft reset since KMD loading. - * @hard_reset_cnt: number of hard reset since KMD loading. + * @soft_reset_cnt: number of soft reset since the driver was loaded. + * @hard_reset_cnt: number of hard reset since the driver was loaded. + * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr * @id: device minor. + * @id_control: minor of the control device * @disabled: is device disabled. * @late_init_done: is late init stage was done during initialization. * @hwmon_initialized: is H/W monitor sensors was initialized. @@ -1218,15 +1295,18 @@ struct hl_device_reset_work { * @mmu_enable: is MMU enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @dma_mask: the dma mask that was set for this device - * @in_debug: is device under debug. This, together with fd_open_cnt, enforces + * @in_debug: is device under debug. This, together with fpriv_list, enforces * that only a single user is configuring the debug infrastructure. + * @cdev_sysfs_created: were char devices and sysfs nodes created. */ struct hl_device { struct pci_dev *pdev; void __iomem *pcie_bar[6]; void __iomem *rmmio; struct cdev cdev; + struct cdev cdev_ctrl; struct device *dev; + struct device *dev_ctrl; struct delayed_work work_freq; struct delayed_work work_heartbeat; char asic_name[16]; @@ -1246,8 +1326,6 @@ struct hl_device { struct gen_pool *cpu_accessible_dma_pool; unsigned long *asid_bitmap; struct mutex asid_mutex; - /* TODO: remove fd_open_cnt_lock for multiple process support */ - struct mutex fd_open_cnt_lock; struct mutex send_cpu_message_lock; struct mutex debug_lock; struct asic_fixed_properties asic_prop; @@ -1266,21 +1344,26 @@ struct hl_device { struct list_head cb_pool; spinlock_t cb_pool_lock; - /* TODO: remove user_ctx for multiple process support */ - struct hl_ctx *user_ctx; + struct list_head fpriv_list; + struct mutex fpriv_list_lock; + + struct hl_ctx *compute_ctx; + + struct hl_device_idle_busy_ts *idle_busy_ts_arr; atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; atomic_t in_reset; - atomic_t curr_pll_profile; - atomic_t fd_open_cnt; - atomic_t cs_active_cnt; + enum hl_pll_frequency curr_pll_profile; + int cs_active_cnt; u32 major; u32 high_pll; u32 soft_reset_cnt; u32 hard_reset_cnt; + u32 idle_busy_ts_idx; u16 id; + u16 id_control; u8 disabled; u8 late_init_done; u8 hwmon_initialized; @@ -1293,6 +1376,7 @@ struct hl_device { u8 device_cpu_disabled; u8 dma_mask; u8 in_debug; + u8 cdev_sysfs_created; /* Parameters for bring-up */ u8 mmu_enable; @@ -1386,6 +1470,7 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, } int hl_device_open(struct inode *inode, struct file *filp); +int hl_device_open_ctrl(struct inode *inode, struct file *filp); bool hl_device_disabled_or_in_reset(struct hl_device *hdev); enum hl_device_status hl_device_status(struct hl_device *hdev); int hl_device_set_debug_mode(struct hl_device *hdev, bool enable); @@ -1439,6 +1524,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, void hl_hpriv_get(struct hl_fpriv *hpriv); void hl_hpriv_put(struct hl_fpriv *hpriv); int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); int hl_build_hwmon_channel_info(struct hl_device *hdev, struct armcp_sensor *sensors_arr); @@ -1463,7 +1549,8 @@ int hl_cb_pool_init(struct hl_device *hdev); int hl_cb_pool_fini(struct hl_device *hdev); void hl_cs_rollback_all(struct hl_device *hdev); -struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue); +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb); void goya_set_asic_funcs(struct hl_device *hdev); @@ -1475,7 +1562,7 @@ void hl_vm_fini(struct hl_device *hdev); int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, struct hl_userptr *userptr); -int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); void hl_userptr_delete_list(struct hl_device *hdev, struct list_head *userptr_list); bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size, @@ -1625,6 +1712,7 @@ static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, /* IOCTLs */ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); +long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg); int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data); int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data); int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data); diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index 6f6dbe93f1df..8c342fb499ca 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -95,80 +95,127 @@ int hl_device_open(struct inode *inode, struct file *filp) return -ENXIO; } - mutex_lock(&hdev->fd_open_cnt_lock); + hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); + if (!hpriv) + return -ENOMEM; + + hpriv->hdev = hdev; + filp->private_data = hpriv; + hpriv->filp = filp; + mutex_init(&hpriv->restore_phase_mutex); + kref_init(&hpriv->refcount); + nonseekable_open(inode, filp); + + hl_cb_mgr_init(&hpriv->cb_mgr); + hl_ctx_mgr_init(&hpriv->ctx_mgr); + + hpriv->taskpid = find_get_pid(current->pid); + + mutex_lock(&hdev->fpriv_list_lock); if (hl_device_disabled_or_in_reset(hdev)) { dev_err_ratelimited(hdev->dev, "Can't open %s because it is disabled or in reset\n", dev_name(hdev->dev)); - mutex_unlock(&hdev->fd_open_cnt_lock); - return -EPERM; + rc = -EPERM; + goto out_err; } if (hdev->in_debug) { dev_err_ratelimited(hdev->dev, "Can't open %s because it is being debugged by another user\n", dev_name(hdev->dev)); - mutex_unlock(&hdev->fd_open_cnt_lock); - return -EPERM; + rc = -EPERM; + goto out_err; } - if (atomic_read(&hdev->fd_open_cnt)) { - dev_info_ratelimited(hdev->dev, + if (hdev->compute_ctx) { + dev_dbg_ratelimited(hdev->dev, "Can't open %s because another user is working on it\n", dev_name(hdev->dev)); - mutex_unlock(&hdev->fd_open_cnt_lock); - return -EBUSY; - } - - atomic_inc(&hdev->fd_open_cnt); - - mutex_unlock(&hdev->fd_open_cnt_lock); - - hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); - if (!hpriv) { - rc = -ENOMEM; - goto close_device; + rc = -EBUSY; + goto out_err; } - hpriv->hdev = hdev; - filp->private_data = hpriv; - hpriv->filp = filp; - mutex_init(&hpriv->restore_phase_mutex); - kref_init(&hpriv->refcount); - nonseekable_open(inode, filp); - - hl_cb_mgr_init(&hpriv->cb_mgr); - hl_ctx_mgr_init(&hpriv->ctx_mgr); - rc = hl_ctx_create(hdev, hpriv); if (rc) { - dev_err(hdev->dev, "Failed to open FD (CTX fail)\n"); + dev_err(hdev->dev, "Failed to create context %d\n", rc); goto out_err; } - hpriv->taskpid = find_get_pid(current->pid); - - /* - * Device is IDLE at this point so it is legal to change PLLs. There - * is no need to check anything because if the PLL is already HIGH, the - * set function will return without doing anything + /* Device is IDLE at this point so it is legal to change PLLs. + * There is no need to check anything because if the PLL is + * already HIGH, the set function will return without doing + * anything */ hl_device_set_frequency(hdev, PLL_HIGH); + list_add(&hpriv->dev_node, &hdev->fpriv_list); + mutex_unlock(&hdev->fpriv_list_lock); + hl_debugfs_add_file(hpriv); return 0; out_err: - filp->private_data = NULL; - hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + mutex_unlock(&hdev->fpriv_list_lock); + hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); + hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + filp->private_data = NULL; mutex_destroy(&hpriv->restore_phase_mutex); + put_pid(hpriv->taskpid); + kfree(hpriv); + return rc; +} + +int hl_device_open_ctrl(struct inode *inode, struct file *filp) +{ + struct hl_device *hdev; + struct hl_fpriv *hpriv; + int rc; + + mutex_lock(&hl_devs_idr_lock); + hdev = idr_find(&hl_devs_idr, iminor(inode)); + mutex_unlock(&hl_devs_idr_lock); + + if (!hdev) { + pr_err("Couldn't find device %d:%d\n", + imajor(inode), iminor(inode)); + return -ENXIO; + } + + hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); + if (!hpriv) + return -ENOMEM; + + mutex_lock(&hdev->fpriv_list_lock); + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_err_ratelimited(hdev->dev_ctrl, + "Can't open %s because it is disabled or in reset\n", + dev_name(hdev->dev_ctrl)); + rc = -EPERM; + goto out_err; + } -close_device: - atomic_dec(&hdev->fd_open_cnt); + list_add(&hpriv->dev_node, &hdev->fpriv_list); + mutex_unlock(&hdev->fpriv_list_lock); + + hpriv->hdev = hdev; + filp->private_data = hpriv; + hpriv->filp = filp; + hpriv->is_control = true; + nonseekable_open(inode, filp); + + hpriv->taskpid = find_get_pid(current->pid); + + return 0; + +out_err: + mutex_unlock(&hdev->fpriv_list_lock); + kfree(hpriv); return rc; } @@ -199,7 +246,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, enum hl_asic_type asic_type, int minor) { struct hl_device *hdev; - int rc; + int rc, main_id, ctrl_id = 0; *dev = NULL; @@ -240,33 +287,34 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, mutex_lock(&hl_devs_idr_lock); - if (minor == -1) { - rc = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, + /* Always save 2 numbers, 1 for main device and 1 for control. + * They must be consecutive + */ + main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL); - } else { - void *old_idr = idr_replace(&hl_devs_idr, hdev, minor); - if (IS_ERR_VALUE(old_idr)) { - rc = PTR_ERR(old_idr); - pr_err("Error %d when trying to replace minor %d\n", - rc, minor); - mutex_unlock(&hl_devs_idr_lock); - goto free_hdev; - } - rc = minor; - } + if (main_id >= 0) + ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1, + main_id + 2, GFP_KERNEL); mutex_unlock(&hl_devs_idr_lock); - if (rc < 0) { - if (rc == -ENOSPC) { + if ((main_id < 0) || (ctrl_id < 0)) { + if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC)) pr_err("too many devices in the system\n"); - rc = -EBUSY; + + if (main_id >= 0) { + mutex_lock(&hl_devs_idr_lock); + idr_remove(&hl_devs_idr, main_id); + mutex_unlock(&hl_devs_idr_lock); } + + rc = -EBUSY; goto free_hdev; } - hdev->id = rc; + hdev->id = main_id; + hdev->id_control = ctrl_id; *dev = hdev; @@ -288,6 +336,7 @@ void destroy_hdev(struct hl_device *hdev) /* Remove device from the device list */ mutex_lock(&hl_devs_idr_lock); idr_remove(&hl_devs_idr, hdev->id); + idr_remove(&hl_devs_idr, hdev->id_control); mutex_unlock(&hl_devs_idr_lock); kfree(hdev); @@ -295,8 +344,7 @@ void destroy_hdev(struct hl_device *hdev) static int hl_pmops_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct hl_device *hdev = pci_get_drvdata(pdev); + struct hl_device *hdev = dev_get_drvdata(dev); pr_debug("Going to suspend PCI device\n"); @@ -310,8 +358,7 @@ static int hl_pmops_suspend(struct device *dev) static int hl_pmops_resume(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct hl_device *hdev = pci_get_drvdata(pdev); + struct hl_device *hdev = dev_get_drvdata(dev); pr_debug("Going to resume PCI device\n"); diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 07127576b3e8..6474b868ef27 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -60,12 +60,17 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask; hw_ip.sram_size = prop->sram_size - sram_kmd_size; hw_ip.dram_size = prop->dram_size - dram_kmd_size; - if (hw_ip.dram_size > 0) + if (hw_ip.dram_size > PAGE_SIZE) hw_ip.dram_enabled = 1; hw_ip.num_of_events = prop->num_of_events; - memcpy(hw_ip.armcp_version, - prop->armcp_info.armcp_version, VERSION_MAX_LEN); - hw_ip.armcp_cpld_version = __le32_to_cpu(prop->armcp_info.cpld_version); + + memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version, + min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN)); + + memcpy(hw_ip.card_name, prop->armcp_info.card_name, + min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); + + hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od; @@ -75,7 +80,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0; } -static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args) +static int hw_events_info(struct hl_device *hdev, bool aggregate, + struct hl_info_args *args) { u32 size, max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; @@ -84,13 +90,14 @@ static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - arr = hdev->asic_funcs->get_events_stat(hdev, &size); + arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size); return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0; } -static int dram_usage_info(struct hl_device *hdev, struct hl_info_args *args) +static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { + struct hl_device *hdev = hpriv->hdev; struct hl_info_dram_usage dram_usage = {0}; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; @@ -104,7 +111,9 @@ static int dram_usage_info(struct hl_device *hdev, struct hl_info_args *args) prop->dram_base_address); dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) - atomic64_read(&hdev->dram_used_mem); - dram_usage.ctx_dram_mem = atomic64_read(&hdev->user_ctx->dram_phys_mem); + if (hpriv->ctx) + dram_usage.ctx_dram_mem = + atomic64_read(&hpriv->ctx->dram_phys_mem); return copy_to_user(out, &dram_usage, min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0; @@ -141,13 +150,16 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) params->op = args->op; if (args->input_ptr && args->input_size) { - input = memdup_user(u64_to_user_ptr(args->input_ptr), - args->input_size); - if (IS_ERR(input)) { - rc = PTR_ERR(input); - input = NULL; - dev_err(hdev->dev, - "error %d when copying input debug data\n", rc); + input = kzalloc(hl_debug_struct_size[args->op], GFP_KERNEL); + if (!input) { + rc = -ENOMEM; + goto out; + } + + if (copy_from_user(input, u64_to_user_ptr(args->input_ptr), + args->input_size)) { + rc = -EFAULT; + dev_err(hdev->dev, "failed to copy input debug data\n"); goto out; } @@ -172,17 +184,14 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) goto out; } - if (output) { - if (copy_to_user((void __user *) (uintptr_t) args->output_ptr, - output, - args->output_size)) { - dev_err(hdev->dev, - "copy to user failed in debug ioctl\n"); - rc = -EFAULT; - goto out; - } + if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr, + output, args->output_size)) { + dev_err(hdev->dev, "copy to user failed in debug ioctl\n"); + rc = -EFAULT; + goto out; } + out: kfree(params); kfree(output); @@ -191,42 +200,123 @@ out: return rc; } -static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) +static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_device_utilization device_util = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + if ((args->period_ms < 100) || (args->period_ms > 1000) || + (args->period_ms % 100)) { + dev_err(hdev->dev, + "period %u must be between 100 - 1000 and must be divisible by 100\n", + args->period_ms); + return -EINVAL; + } + + device_util.utilization = hl_device_utilization(hdev, args->period_ms); + + return copy_to_user(out, &device_util, + min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; +} + +static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_clk_rate clk_rate = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, + &clk_rate.max_clk_rate_mhz); + if (rc) + return rc; + + return copy_to_user(out, &clk_rate, + min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0; +} + +static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_reset_count reset_count = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + reset_count.hard_reset_cnt = hdev->hard_reset_cnt; + reset_count.soft_reset_cnt = hdev->soft_reset_cnt; + + return copy_to_user(out, &reset_count, + min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0; +} + +static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, + struct device *dev) { struct hl_info_args *args = data; struct hl_device *hdev = hpriv->hdev; int rc; - /* We want to return device status even if it disabled or in reset */ - if (args->op == HL_INFO_DEVICE_STATUS) + /* + * Information is returned for the following opcodes even if the device + * is disabled or in reset. + */ + switch (args->op) { + case HL_INFO_HW_IP_INFO: + return hw_ip_info(hdev, args); + + case HL_INFO_DEVICE_STATUS: return device_status_info(hdev, args); + case HL_INFO_RESET_COUNT: + return get_reset_count(hdev, args); + + default: + break; + } + if (hl_device_disabled_or_in_reset(hdev)) { - dev_warn_ratelimited(hdev->dev, + dev_warn_ratelimited(dev, "Device is %s. Can't execute INFO IOCTL\n", atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); return -EBUSY; } switch (args->op) { - case HL_INFO_HW_IP_INFO: - rc = hw_ip_info(hdev, args); - break; - case HL_INFO_HW_EVENTS: - rc = hw_events_info(hdev, args); + rc = hw_events_info(hdev, false, args); break; case HL_INFO_DRAM_USAGE: - rc = dram_usage_info(hdev, args); + rc = dram_usage_info(hpriv, args); break; case HL_INFO_HW_IDLE: rc = hw_idle(hdev, args); break; + case HL_INFO_DEVICE_UTILIZATION: + rc = device_utilization(hdev, args); + break; + + case HL_INFO_HW_EVENTS_AGGREGATE: + rc = hw_events_info(hdev, true, args); + break; + + case HL_INFO_CLK_RATE: + rc = get_clk_rate(hdev, args); + break; + default: - dev_err(hdev->dev, "Invalid request %d\n", args->op); + dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; break; } @@ -234,6 +324,16 @@ static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) return rc; } +static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) +{ + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev); +} + +static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data) +{ + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl); +} + static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) { struct hl_debug_args *args = data; @@ -288,52 +388,45 @@ static const struct hl_ioctl_desc hl_ioctls[] = { HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl) }; -#define HL_CORE_IOCTL_COUNT ARRAY_SIZE(hl_ioctls) +static const struct hl_ioctl_desc hl_ioctls_control[] = { + HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control) +}; -long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, + const struct hl_ioctl_desc *ioctl, struct device *dev) { struct hl_fpriv *hpriv = filep->private_data; struct hl_device *hdev = hpriv->hdev; - hl_ioctl_t *func; - const struct hl_ioctl_desc *ioctl = NULL; unsigned int nr = _IOC_NR(cmd); char stack_kdata[128] = {0}; char *kdata = NULL; unsigned int usize, asize; + hl_ioctl_t *func; + u32 hl_size; int retcode; if (hdev->hard_reset_pending) { - dev_crit_ratelimited(hdev->dev, + dev_crit_ratelimited(hdev->dev_ctrl, "Device HARD reset pending! Please close FD\n"); return -ENODEV; } - if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { - u32 hl_size; - - ioctl = &hl_ioctls[nr]; - - hl_size = _IOC_SIZE(ioctl->cmd); - usize = asize = _IOC_SIZE(cmd); - if (hl_size > asize) - asize = hl_size; - - cmd = ioctl->cmd; - } else { - dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n", - task_pid_nr(current), nr); - return -ENOTTY; - } - /* Do not trust userspace, use our own definition */ func = ioctl->func; if (unlikely(!func)) { - dev_dbg(hdev->dev, "no function\n"); + dev_dbg(dev, "no function\n"); retcode = -ENOTTY; goto out_err; } + hl_size = _IOC_SIZE(ioctl->cmd); + usize = asize = _IOC_SIZE(cmd); + if (hl_size > asize) + asize = hl_size; + + cmd = ioctl->cmd; + if (cmd & (IOC_IN | IOC_OUT)) { if (asize <= sizeof(stack_kdata)) { kdata = stack_kdata; @@ -357,14 +450,12 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) retcode = func(hpriv, kdata); - if (cmd & IOC_OUT) - if (copy_to_user((void __user *)arg, kdata, usize)) - retcode = -EFAULT; + if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize)) + retcode = -EFAULT; out_err: if (retcode) - dev_dbg(hdev->dev, - "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", + dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", task_pid_nr(current), cmd, nr); if (kdata != stack_kdata) @@ -372,3 +463,39 @@ out_err: return retcode; } + +long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct hl_fpriv *hpriv = filep->private_data; + struct hl_device *hdev = hpriv->hdev; + const struct hl_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); + + if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { + ioctl = &hl_ioctls[nr]; + } else { + dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n", + task_pid_nr(current), nr); + return -ENOTTY; + } + + return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev); +} + +long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct hl_fpriv *hpriv = filep->private_data; + struct hl_device *hdev = hpriv->hdev; + const struct hl_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); + + if (nr == _IOC_NR(HL_IOCTL_INFO)) { + ioctl = &hl_ioctls_control[nr]; + } else { + dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n", + task_pid_nr(current), nr); + return -ENOTTY; + } + + return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl); +} diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c index 5f5673b74985..91579dde9262 100644 --- a/drivers/misc/habanalabs/hw_queue.c +++ b/drivers/misc/habanalabs/hw_queue.c @@ -58,8 +58,8 @@ out: } /* - * ext_queue_submit_bd - Submit a buffer descriptor to an external queue - * + * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a + * H/W queue. * @hdev: pointer to habanalabs device structure * @q: pointer to habanalabs queue structure * @ctl: BD's control word @@ -73,16 +73,16 @@ out: * This function must be called when the scheduler mutex is taken * */ -static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, - u32 ctl, u32 len, u64 ptr) +static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, + struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) { struct hl_bd *bd; bd = (struct hl_bd *) (uintptr_t) q->kernel_address; bd += hl_pi_2_offset(q->pi); - bd->ctl = __cpu_to_le32(ctl); - bd->len = __cpu_to_le32(len); - bd->ptr = __cpu_to_le64(ptr); + bd->ctl = cpu_to_le32(ctl); + bd->len = cpu_to_le32(len); + bd->ptr = cpu_to_le64(ptr); q->pi = hl_queue_inc_ptr(q->pi); hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); @@ -174,6 +174,45 @@ static int int_queue_sanity_checks(struct hl_device *hdev, } /* + * hw_queue_sanity_checks() - Perform some sanity checks on a H/W queue. + * @hdev: Pointer to hl_device structure. + * @q: Pointer to hl_hw_queue structure. + * @num_of_entries: How many entries to check for space. + * + * Perform the following: + * - Make sure we have enough space in the completion queue. + * This check also ensures that there is enough space in the h/w queue, as + * both queues are of the same size. + * - Reserve space in the completion queue (needs to be reversed if there + * is a failure down the road before the actual submission of work). + * + * Both operations are done using the "free_slots_cnt" field of the completion + * queue. The CI counters of the queue and the completion queue are not + * needed/used for the H/W queue type. + */ +static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, + int num_of_entries) +{ + atomic_t *free_slots = + &hdev->completion_queue[q->hw_queue_id].free_slots_cnt; + + /* + * Check we have enough space in the completion queue. + * Add -1 to counter (decrement) unless counter was already 0. + * In that case, CQ is full so we can't submit a new CB. + * atomic_add_unless will return 0 if counter was already 0. + */ + if (atomic_add_negative(num_of_entries * -1, free_slots)) { + dev_dbg(hdev->dev, "No space for %d entries on CQ %d\n", + num_of_entries, q->hw_queue_id); + atomic_add(num_of_entries, free_slots); + return -EAGAIN; + } + + return 0; +} + +/* * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion * * @hdev: pointer to hl_device structure @@ -188,7 +227,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, u32 cb_size, u64 cb_ptr) { struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; - int rc; + int rc = 0; /* * The CPU queue is a synchronous queue with an effective depth of @@ -206,11 +245,18 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, goto out; } - rc = ext_queue_sanity_checks(hdev, q, 1, false); - if (rc) - goto out; + /* + * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue + * type only on init phase, when the queues are empty and being tested, + * so there is no need for sanity checks. + */ + if (q->queue_type != QUEUE_TYPE_HW) { + rc = ext_queue_sanity_checks(hdev, q, 1, false); + if (rc) + goto out; + } - ext_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); + ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); out: if (q->queue_type != QUEUE_TYPE_CPU) @@ -220,14 +266,14 @@ out: } /* - * ext_hw_queue_schedule_job - submit an JOB to an external queue + * ext_queue_schedule_job - submit a JOB to an external queue * * @job: pointer to the job that needs to be submitted to the queue * * This function must be called when the scheduler mutex is taken * */ -static void ext_hw_queue_schedule_job(struct hl_cs_job *job) +static void ext_queue_schedule_job(struct hl_cs_job *job) { struct hl_device *hdev = job->cs->ctx->hdev; struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; @@ -249,7 +295,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job) len = job->job_cb_size; ptr = cb->bus_address; - cq_pkt.data = __cpu_to_le32( + cq_pkt.data = cpu_to_le32( ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) & CQ_ENTRY_SHADOW_INDEX_MASK) | (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | @@ -260,32 +306,32 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job) * H/W queues is done under the scheduler mutex * * No need to check if CQ is full because it was already - * checked in hl_queue_sanity_checks + * checked in ext_queue_sanity_checks */ cq = &hdev->completion_queue[q->hw_queue_id]; cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, cq_addr, - __le32_to_cpu(cq_pkt.data), + le32_to_cpu(cq_pkt.data), q->hw_queue_id); q->shadow_queue[hl_pi_2_offset(q->pi)] = job; cq->pi = hl_cq_inc_ptr(cq->pi); - ext_queue_submit_bd(hdev, q, ctl, len, ptr); + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } /* - * int_hw_queue_schedule_job - submit an JOB to an internal queue + * int_queue_schedule_job - submit a JOB to an internal queue * * @job: pointer to the job that needs to be submitted to the queue * * This function must be called when the scheduler mutex is taken * */ -static void int_hw_queue_schedule_job(struct hl_cs_job *job) +static void int_queue_schedule_job(struct hl_cs_job *job) { struct hl_device *hdev = job->cs->ctx->hdev; struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; @@ -308,6 +354,60 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job) } /* + * hw_queue_schedule_job - submit a JOB to a H/W queue + * + * @job: pointer to the job that needs to be submitted to the queue + * + * This function must be called when the scheduler mutex is taken + * + */ +static void hw_queue_schedule_job(struct hl_cs_job *job) +{ + struct hl_device *hdev = job->cs->ctx->hdev; + struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; + struct hl_cq *cq; + u64 ptr; + u32 offset, ctl, len; + + /* + * Upon PQE completion, COMP_DATA is used as the write data to the + * completion queue (QMAN HBW message), and COMP_OFFSET is used as the + * write address offset in the SM block (QMAN LBW message). + * The write address offset is calculated as "COMP_OFFSET << 2". + */ + offset = job->cs->sequence & (HL_MAX_PENDING_CS - 1); + ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) | + ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); + + len = job->job_cb_size; + + /* + * A patched CB is created only if a user CB was allocated by driver and + * MMU is disabled. If MMU is enabled, the user CB should be used + * instead. If the user CB wasn't allocated by driver, assume that it + * holds an address. + */ + if (job->patched_cb) + ptr = job->patched_cb->bus_address; + else if (job->is_kernel_allocated_cb) + ptr = job->user_cb->bus_address; + else + ptr = (u64) (uintptr_t) job->user_cb; + + /* + * No need to protect pi_offset because scheduling to the + * H/W queues is done under the scheduler mutex + * + * No need to check if CQ is full because it was already + * checked in hw_queue_sanity_checks + */ + cq = &hdev->completion_queue[q->hw_queue_id]; + cq->pi = hl_cq_inc_ptr(cq->pi); + + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); +} + +/* * hl_hw_queue_schedule_cs - schedule a command submission * * @job : pointer to the CS @@ -330,23 +430,34 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) } q = &hdev->kernel_queues[0]; - /* This loop assumes all external queues are consecutive */ for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) { - if (q->queue_type == QUEUE_TYPE_EXT) { - if (cs->jobs_in_queue_cnt[i]) { + if (cs->jobs_in_queue_cnt[i]) { + switch (q->queue_type) { + case QUEUE_TYPE_EXT: rc = ext_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i], true); - if (rc) - goto unroll_cq_resv; - cq_cnt++; - } - } else if (q->queue_type == QUEUE_TYPE_INT) { - if (cs->jobs_in_queue_cnt[i]) { + cs->jobs_in_queue_cnt[i], true); + break; + case QUEUE_TYPE_INT: rc = int_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i]); - if (rc) - goto unroll_cq_resv; + cs->jobs_in_queue_cnt[i]); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_sanity_checks(hdev, q, + cs->jobs_in_queue_cnt[i]); + break; + default: + dev_err(hdev->dev, "Queue type %d is invalid\n", + q->queue_type); + rc = -EINVAL; + break; } + + if (rc) + goto unroll_cq_resv; + + if (q->queue_type == QUEUE_TYPE_EXT || + q->queue_type == QUEUE_TYPE_HW) + cq_cnt++; } } @@ -364,24 +475,39 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) spin_unlock(&hdev->hw_queues_mirror_lock); } - atomic_inc(&hdev->cs_active_cnt); + if (!hdev->cs_active_cnt++) { + struct hl_device_idle_busy_ts *ts; + + ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx]; + ts->busy_to_idle_ts = ktime_set(0, 0); + ts->idle_to_busy_ts = ktime_get(); + } list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) - if (job->ext_queue) - ext_hw_queue_schedule_job(job); - else - int_hw_queue_schedule_job(job); + switch (job->queue_type) { + case QUEUE_TYPE_EXT: + ext_queue_schedule_job(job); + break; + case QUEUE_TYPE_INT: + int_queue_schedule_job(job); + break; + case QUEUE_TYPE_HW: + hw_queue_schedule_job(job); + break; + default: + break; + } cs->submitted = true; goto out; unroll_cq_resv: - /* This loop assumes all external queues are consecutive */ q = &hdev->kernel_queues[0]; for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) { - if ((q->queue_type == QUEUE_TYPE_EXT) && - (cs->jobs_in_queue_cnt[i])) { + if ((q->queue_type == QUEUE_TYPE_EXT || + q->queue_type == QUEUE_TYPE_HW) && + cs->jobs_in_queue_cnt[i]) { atomic_t *free_slots = &hdev->completion_queue[i].free_slots_cnt; atomic_add(cs->jobs_in_queue_cnt[i], free_slots); @@ -408,8 +534,8 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) q->ci = hl_queue_inc_ptr(q->ci); } -static int ext_and_cpu_hw_queue_init(struct hl_device *hdev, - struct hl_hw_queue *q, bool is_cpu_queue) +static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, + bool is_cpu_queue) { void *p; int rc; @@ -459,7 +585,7 @@ free_queue: return rc; } -static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { void *p; @@ -479,18 +605,38 @@ static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) return 0; } -static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { - return ext_and_cpu_hw_queue_init(hdev, q, true); + return ext_and_cpu_queue_init(hdev, q, true); } -static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { - return ext_and_cpu_hw_queue_init(hdev, q, false); + return ext_and_cpu_queue_init(hdev, q, false); +} + +static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + void *p; + + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address, + GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + q->kernel_address = (u64) (uintptr_t) p; + + /* Make sure read/write pointers are initialized to start of queue */ + q->ci = 0; + q->pi = 0; + + return 0; } /* - * hw_queue_init - main initialization function for H/W queue object + * queue_init - main initialization function for H/W queue object * * @hdev: pointer to hl_device device structure * @q: pointer to hl_hw_queue queue structure @@ -499,7 +645,7 @@ static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) * Allocate dma-able memory for the queue and initialize fields * Returns 0 on success */ -static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, +static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, u32 hw_queue_id) { int rc; @@ -510,21 +656,20 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, switch (q->queue_type) { case QUEUE_TYPE_EXT: - rc = ext_hw_queue_init(hdev, q); + rc = ext_queue_init(hdev, q); break; - case QUEUE_TYPE_INT: - rc = int_hw_queue_init(hdev, q); + rc = int_queue_init(hdev, q); break; - case QUEUE_TYPE_CPU: - rc = cpu_hw_queue_init(hdev, q); + rc = cpu_queue_init(hdev, q); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_init(hdev, q); break; - case QUEUE_TYPE_NA: q->valid = 0; return 0; - default: dev_crit(hdev->dev, "wrong queue type %d during init\n", q->queue_type); @@ -548,7 +693,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, * * Free the queue memory */ -static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) +static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) { if (!q->valid) return; @@ -606,7 +751,7 @@ int hl_hw_queues_create(struct hl_device *hdev) i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) { q->queue_type = asic->hw_queues_props[i].type; - rc = hw_queue_init(hdev, q, i); + rc = queue_init(hdev, q, i); if (rc) { dev_err(hdev->dev, "failed to initialize queue %d\n", i); @@ -618,7 +763,7 @@ int hl_hw_queues_create(struct hl_device *hdev) release_queues: for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) - hw_queue_fini(hdev, q); + queue_fini(hdev, q); kfree(hdev->kernel_queues); @@ -631,7 +776,7 @@ void hl_hw_queues_destroy(struct hl_device *hdev) int i; for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++) - hw_queue_fini(hdev, q); + queue_fini(hdev, q); kfree(hdev->kernel_queues); } diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c index 77facd25c4a2..7be4bace9b4f 100644 --- a/drivers/misc/habanalabs/hwmon.c +++ b/drivers/misc/habanalabs/hwmon.c @@ -26,7 +26,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, int rc, i, j; for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) { - type = __le32_to_cpu(sensors_arr[i].type); + type = le32_to_cpu(sensors_arr[i].type); if ((type == 0) && (sensors_arr[i].flags == 0)) break; @@ -58,10 +58,10 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, } for (i = 0 ; i < arr_size ; i++) { - type = __le32_to_cpu(sensors_arr[i].type); + type = le32_to_cpu(sensors_arr[i].type); curr_arr = sensors_by_type[type]; curr_arr[sensors_by_type_next_index[type]++] = - __le32_to_cpu(sensors_arr[i].flags); + le32_to_cpu(sensors_arr[i].flags); } channels_info = kcalloc(num_active_sensor_types + 1, @@ -273,7 +273,7 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET << ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -299,7 +299,7 @@ long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET << ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -325,7 +325,7 @@ long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_CURRENT_GET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET << ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -351,7 +351,7 @@ long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET << ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -377,7 +377,7 @@ long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_PWM_GET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET << ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -403,11 +403,11 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_PWM_SET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET << ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); - pkt.value = __cpu_to_le64(value); + pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), SENSORS_PKT_TIMEOUT, NULL); @@ -421,6 +421,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, int hl_hwmon_init(struct hl_device *hdev) { struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev; + struct asic_fixed_properties *prop = &hdev->asic_prop; int rc; if ((hdev->hwmon_initialized) || !(hdev->fw_loading)) @@ -430,7 +431,8 @@ int hl_hwmon_init(struct hl_device *hdev) hdev->hl_chip_info->ops = &hl_hwmon_ops; hdev->hwmon_dev = hwmon_device_register_with_info(dev, - "habanalabs", hdev, hdev->hl_chip_info, NULL); + prop->armcp_info.card_name, hdev, + hdev->hl_chip_info, NULL); if (IS_ERR(hdev->hwmon_dev)) { rc = PTR_ERR(hdev->hwmon_dev); dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h index 1f1e35e86d84..e4c6699a1868 100644 --- a/drivers/misc/habanalabs/include/armcp_if.h +++ b/drivers/misc/habanalabs/include/armcp_if.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2018 HabanaLabs, Ltd. + * Copyright 2016-2019 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -41,33 +41,34 @@ enum pq_init_status { /* * ArmCP Primary Queue Packets * - * During normal operation, KMD needs to send various messages to ArmCP, - * usually either to SET some value into a H/W periphery or to GET the current - * value of some H/W periphery. For example, SET the frequency of MME/TPC and - * GET the value of the thermal sensor. - * - * These messages can be initiated either by the User application or by KMD - * itself, e.g. power management code. In either case, the communication from - * KMD to ArmCP will *always* be in synchronous mode, meaning that KMD will - * send a single message and poll until the message was acknowledged and the - * results are ready (if results are needed). - * - * This means that only a single message can be sent at a time and KMD must - * wait for its result before sending the next message. Having said that, - * because these are control messages which are sent in a relatively low + * During normal operation, the host's kernel driver needs to send various + * messages to ArmCP, usually either to SET some value into a H/W periphery or + * to GET the current value of some H/W periphery. For example, SET the + * frequency of MME/TPC and GET the value of the thermal sensor. + * + * These messages can be initiated either by the User application or by the + * host's driver itself, e.g. power management code. In either case, the + * communication from the host's driver to ArmCP will *always* be in + * synchronous mode, meaning that the host will send a single message and poll + * until the message was acknowledged and the results are ready (if results are + * needed). + * + * This means that only a single message can be sent at a time and the host's + * driver must wait for its result before sending the next message. Having said + * that, because these are control messages which are sent in a relatively low * frequency, this limitation seems acceptable. It's important to note that * in case of multiple devices, messages to different devices *can* be sent * at the same time. * * The message, inputs/outputs (if relevant) and fence object will be located - * on the device DDR at an address that will be determined by KMD. During - * device initialization phase, KMD will pass to ArmCP that address. Most of - * the message types will contain inputs/outputs inside the message itself. - * The common part of each message will contain the opcode of the message (its - * type) and a field representing a fence object. - * - * When KMD wishes to send a message to ArmCP, it will write the message - * contents to the device DDR, clear the fence object and then write the + * on the device DDR at an address that will be determined by the host's driver. + * During device initialization phase, the host will pass to ArmCP that address. + * Most of the message types will contain inputs/outputs inside the message + * itself. The common part of each message will contain the opcode of the + * message (its type) and a field representing a fence object. + * + * When the host's driver wishes to send a message to ArmCP, it will write the + * message contents to the device DDR, clear the fence object and then write the * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue * the 484 interrupt-id to the ARM core. * @@ -78,12 +79,13 @@ enum pq_init_status { * device DDR and then write to the fence object. If an error occurred, ArmCP * will fill the rc field with the right error code. * - * In the meantime, KMD will poll on the fence object. Once KMD sees that the - * fence object is signaled, it will read the results from the device DDR - * (if relevant) and resume the code execution in KMD. + * In the meantime, the host's driver will poll on the fence object. Once the + * host sees that the fence object is signaled, it will read the results from + * the device DDR (if relevant) and resume the code execution in the host's + * driver. * * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 - * so the value being put by the KMD matches the value read by ArmCP + * so the value being put by the host's driver matches the value read by ArmCP * * Non-QMAN packets should be limited to values 1 through (2^8 - 1) * @@ -148,9 +150,9 @@ enum pq_init_status { * * ARMCP_PACKET_INFO_GET - * Fetch information from the device as specified in the packet's - * structure. KMD passes the max size it allows the ArmCP to write to - * the structure, to prevent data corruption in case of mismatched - * KMD/FW versions. + * structure. The host's driver passes the max size it allows the ArmCP to + * write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. * * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed * @@ -183,9 +185,9 @@ enum pq_init_status { * ARMCP_PACKET_EEPROM_DATA_GET - * Get EEPROM data from the ArmCP kernel. The buffer is specified in the * addr field. The CPU will put the returned data size in the result - * field. In addition, KMD passes the max size it allows the ArmCP to - * write to the structure, to prevent data corruption in case of - * mismatched KMD/FW versions. + * field. In addition, the host's driver passes the max size it allows the + * ArmCP to write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. * */ @@ -231,7 +233,7 @@ struct armcp_packet { __le32 ctl; - __le32 fence; /* Signal to KMD that message is completed */ + __le32 fence; /* Signal to host that message is completed */ union { struct {/* For temperature/current/voltage/fan/pwm get/set */ @@ -310,6 +312,7 @@ struct eq_generic_event { * ArmCP info */ +#define CARD_NAME_MAX_LEN 16 #define VERSION_MAX_LEN 128 #define ARMCP_MAX_SENSORS 128 @@ -318,6 +321,19 @@ struct armcp_sensor { __le32 flags; }; +/** + * struct armcp_info - Info from ArmCP that is necessary to the host's driver + * @sensors: available sensors description. + * @kernel_version: ArmCP linux kernel version. + * @reserved: reserved field. + * @cpld_version: CPLD programmed F/W version. + * @infineon_version: Infineon main DC-DC version. + * @fuse_version: silicon production FUSE information. + * @thermal_version: thermald S/W version. + * @armcp_version: ArmCP S/W version. + * @dram_size: available DRAM size. + * @card_name: card name that will be displayed in HWMON subsystem on the host + */ struct armcp_info { struct armcp_sensor sensors[ARMCP_MAX_SENSORS]; __u8 kernel_version[VERSION_MAX_LEN]; @@ -328,6 +344,7 @@ struct armcp_info { __u8 thermal_version[VERSION_MAX_LEN]; __u8 armcp_version[VERSION_MAX_LEN]; __le64 dram_size; + char card_name[CARD_NAME_MAX_LEN]; }; #endif /* ARMCP_IF_H */ diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h index 8618891d5afa..3c44ef3a23ed 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h @@ -260,4 +260,6 @@ #define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT #define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT +#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1 + #endif /* ASIC_REG_GOYA_MASKS_H_ */ diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h index 19b0f0ef1d0b..fce490e6a231 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h @@ -84,6 +84,7 @@ #include "tpc6_rtr_regs.h" #include "tpc7_nrtr_regs.h" #include "tpc0_eml_cfg_regs.h" +#include "psoc_etr_regs.h" #include "psoc_global_conf_masks.h" #include "dma_macro_masks.h" diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h new file mode 100644 index 000000000000..b7c33e025db5 --- /dev/null +++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2018 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +/************************************ + ** This is an auto-generated file ** + ** DO NOT EDIT BELOW ** + ************************************/ + +#ifndef ASIC_REG_PSOC_ETR_REGS_H_ +#define ASIC_REG_PSOC_ETR_REGS_H_ + +/* + ***************************************** + * PSOC_ETR (Prototype: ETR) + ***************************************** + */ + +#define mmPSOC_ETR_RSZ 0x2C43004 + +#define mmPSOC_ETR_STS 0x2C4300C + +#define mmPSOC_ETR_RRD 0x2C43010 + +#define mmPSOC_ETR_RRP 0x2C43014 + +#define mmPSOC_ETR_RWP 0x2C43018 + +#define mmPSOC_ETR_TRG 0x2C4301C + +#define mmPSOC_ETR_CTL 0x2C43020 + +#define mmPSOC_ETR_RWD 0x2C43024 + +#define mmPSOC_ETR_MODE 0x2C43028 + +#define mmPSOC_ETR_LBUFLEVEL 0x2C4302C + +#define mmPSOC_ETR_CBUFLEVEL 0x2C43030 + +#define mmPSOC_ETR_BUFWM 0x2C43034 + +#define mmPSOC_ETR_RRPHI 0x2C43038 + +#define mmPSOC_ETR_RWPHI 0x2C4303C + +#define mmPSOC_ETR_AXICTL 0x2C43110 + +#define mmPSOC_ETR_DBALO 0x2C43118 + +#define mmPSOC_ETR_DBAHI 0x2C4311C + +#define mmPSOC_ETR_FFSR 0x2C43300 + +#define mmPSOC_ETR_FFCR 0x2C43304 + +#define mmPSOC_ETR_PSCR 0x2C43308 + +#define mmPSOC_ETR_ITMISCOP0 0x2C43EE0 + +#define mmPSOC_ETR_ITTRFLIN 0x2C43EE8 + +#define mmPSOC_ETR_ITATBDATA0 0x2C43EEC + +#define mmPSOC_ETR_ITATBCTR2 0x2C43EF0 + +#define mmPSOC_ETR_ITATBCTR1 0x2C43EF4 + +#define mmPSOC_ETR_ITATBCTR0 0x2C43EF8 + +#define mmPSOC_ETR_ITCTRL 0x2C43F00 + +#define mmPSOC_ETR_CLAIMSET 0x2C43FA0 + +#define mmPSOC_ETR_CLAIMCLR 0x2C43FA4 + +#define mmPSOC_ETR_LAR 0x2C43FB0 + +#define mmPSOC_ETR_LSR 0x2C43FB4 + +#define mmPSOC_ETR_AUTHSTATUS 0x2C43FB8 + +#define mmPSOC_ETR_DEVID 0x2C43FC8 + +#define mmPSOC_ETR_DEVTYPE 0x2C43FCC + +#define mmPSOC_ETR_PERIPHID4 0x2C43FD0 + +#define mmPSOC_ETR_PERIPHID5 0x2C43FD4 + +#define mmPSOC_ETR_PERIPHID6 0x2C43FD8 + +#define mmPSOC_ETR_PERIPHID7 0x2C43FDC + +#define mmPSOC_ETR_PERIPHID0 0x2C43FE0 + +#define mmPSOC_ETR_PERIPHID1 0x2C43FE4 + +#define mmPSOC_ETR_PERIPHID2 0x2C43FE8 + +#define mmPSOC_ETR_PERIPHID3 0x2C43FEC + +#define mmPSOC_ETR_COMPID0 0x2C43FF0 + +#define mmPSOC_ETR_COMPID1 0x2C43FF4 + +#define mmPSOC_ETR_COMPID2 0x2C43FF8 + +#define mmPSOC_ETR_COMPID3 0x2C43FFC + +#endif /* ASIC_REG_PSOC_ETR_REGS_H_ */ diff --git a/drivers/misc/habanalabs/include/goya/goya.h b/drivers/misc/habanalabs/include/goya/goya.h index 3f02a52ba4ce..43d241891e45 100644 --- a/drivers/misc/habanalabs/include/goya/goya.h +++ b/drivers/misc/habanalabs/include/goya/goya.h @@ -38,4 +38,6 @@ #define TPC_MAX_NUM 8 +#define MME_MAX_NUM 1 + #endif /* GOYA_H */ diff --git a/drivers/misc/habanalabs/include/goya/goya_reg_map.h b/drivers/misc/habanalabs/include/goya/goya_reg_map.h new file mode 100644 index 000000000000..cd89723c7f61 --- /dev/null +++ b/drivers/misc/habanalabs/include/goya/goya_reg_map.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2019 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef GOYA_REG_MAP_H_ +#define GOYA_REG_MAP_H_ + +/* + * PSOC scratch-pad registers + */ +#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 +#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1 +#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2 +#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3 +#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4 +#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5 +#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6 +#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7 +#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8 +#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9 +#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10 +#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26 +#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27 +#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28 +#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29 +#define mmUBOOT_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_30 +#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31 + +#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS + +#endif /* GOYA_REG_MAP_H_ */ diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h index 4cd04c090285..2853a2de8cf6 100644 --- a/drivers/misc/habanalabs/include/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/hl_boot_if.h @@ -20,6 +20,8 @@ enum cpu_boot_status { CPU_BOOT_STATUS_DRAM_INIT_FAIL, CPU_BOOT_STATUS_FIT_CORRUPTED, CPU_BOOT_STATUS_UBOOT_NOT_READY, + CPU_BOOT_STATUS_RESERVED, + CPU_BOOT_STATUS_TS_INIT_FAIL, }; enum kmd_msg { diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h index 71ea3c3e8ba3..a6851a9d3f03 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -12,18 +12,16 @@ #define PAGE_SHIFT_2MB 21 #define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB) #define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB) -#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1)) #define PAGE_PRESENT_MASK 0x0000000000001ull #define SWAP_OUT_MASK 0x0000000000004ull #define LAST_MASK 0x0000000000800ull -#define PHYS_ADDR_MASK 0xFFFFFFFFFFFFF000ull #define HOP0_MASK 0x3000000000000ull #define HOP1_MASK 0x0FF8000000000ull #define HOP2_MASK 0x0007FC0000000ull #define HOP3_MASK 0x000003FE00000ull #define HOP4_MASK 0x00000001FF000ull -#define OFFSET_MASK 0x0000000000FFFull +#define FLAGS_MASK 0x0000000000FFFull #define HOP0_SHIFT 48 #define HOP1_SHIFT 39 @@ -31,8 +29,7 @@ #define HOP3_SHIFT 21 #define HOP4_SHIFT 12 -#define PTE_PHYS_ADDR_SHIFT 12 -#define PTE_PHYS_ADDR_MASK ~OFFSET_MASK +#define HOP_PHYS_ADDR_MASK (~FLAGS_MASK) #define HL_PTE_SIZE sizeof(u64) #define HOP_TABLE_SIZE PAGE_SIZE_4KB diff --git a/drivers/misc/habanalabs/include/qman_if.h b/drivers/misc/habanalabs/include/qman_if.h index bf59bbe27fdc..0fdb49188ed7 100644 --- a/drivers/misc/habanalabs/include/qman_if.h +++ b/drivers/misc/habanalabs/include/qman_if.h @@ -23,6 +23,8 @@ struct hl_bd { #define HL_BD_SIZE sizeof(struct hl_bd) /* + * S/W CTL FIELDS. + * * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is * valid. 1 means the repeat field is valid, 0 means not-valid, * i.e. repeat == 1 @@ -34,6 +36,16 @@ struct hl_bd { #define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF /* + * H/W CTL FIELDS + */ + +#define BD_CTL_COMP_OFFSET_SHIFT 16 +#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000 + +#define BD_CTL_COMP_DATA_SHIFT 0 +#define BD_CTL_COMP_DATA_MASK 0x0000FFFF + +/* * COMPLETION QUEUE */ diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c index 199791b57caf..fac65fbd70e8 100644 --- a/drivers/misc/habanalabs/irq.c +++ b/drivers/misc/habanalabs/irq.c @@ -160,7 +160,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) while (1) { bool entry_ready = - ((__le32_to_cpu(eq_base[eq->ci].hdr.ctl) & + ((le32_to_cpu(eq_base[eq->ci].hdr.ctl) & EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT); if (!entry_ready) @@ -194,7 +194,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) skip_irq: /* Clear EQ entry ready bit */ eq_entry->hdr.ctl = - __cpu_to_le32(__le32_to_cpu(eq_entry->hdr.ctl) & + cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) & ~EQ_CTL_READY_MASK); eq->ci = hl_eq_inc_ptr(eq->ci); diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 365fb0cb8dff..6c72cb4eff54 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -13,7 +13,6 @@ #include <linux/slab.h> #include <linux/genalloc.h> -#define PGS_IN_2MB_PAGE (PAGE_SIZE_2MB >> PAGE_SHIFT) #define HL_MMU_DEBUG 0 /* @@ -159,20 +158,19 @@ pages_pack_err: } /* - * get_userptr_from_host_va - initialize userptr structure from given host - * virtual address - * - * @hdev : habanalabs device structure - * @args : parameters containing the virtual address and size - * @p_userptr : pointer to result userptr structure + * dma_map_host_va - DMA mapping of the given host virtual address. + * @hdev: habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @p_userptr: pointer to result userptr structure * * This function does the following: * - Allocate userptr structure * - Pin the given host memory using the userptr structure * - Perform DMA mapping to have the DMA addresses of the pages */ -static int get_userptr_from_host_va(struct hl_device *hdev, - struct hl_mem_in *args, struct hl_userptr **p_userptr) +static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, + struct hl_userptr **p_userptr) { struct hl_userptr *userptr; int rc; @@ -183,8 +181,7 @@ static int get_userptr_from_host_va(struct hl_device *hdev, goto userptr_err; } - rc = hl_pin_host_memory(hdev, args->map_host.host_virt_addr, - args->map_host.mem_size, userptr); + rc = hl_pin_host_memory(hdev, addr, size, userptr); if (rc) { dev_err(hdev->dev, "Failed to pin host memory\n"); goto pin_err; @@ -215,16 +212,16 @@ userptr_err: } /* - * free_userptr - free userptr structure - * - * @hdev : habanalabs device structure - * @userptr : userptr to free + * dma_unmap_host_va - DMA unmapping of the given host virtual address. + * @hdev: habanalabs device structure + * @userptr: userptr to free * * This function does the following: * - Unpins the physical pages * - Frees the userptr structure */ -static void free_userptr(struct hl_device *hdev, struct hl_userptr *userptr) +static void dma_unmap_host_va(struct hl_device *hdev, + struct hl_userptr *userptr) { hl_unpin_host_memory(hdev, userptr); kfree(userptr); @@ -253,10 +250,9 @@ static void dram_pg_pool_do_release(struct kref *ref) } /* - * free_phys_pg_pack - free physical page pack - * - * @hdev : habanalabs device structure - * @phys_pg_pack : physical page pack to free + * free_phys_pg_pack - free physical page pack + * @hdev: habanalabs device structure + * @phys_pg_pack: physical page pack to free * * This function does the following: * - For DRAM memory only, iterate over the pack and free each physical block @@ -264,7 +260,7 @@ static void dram_pg_pool_do_release(struct kref *ref) * - Free the hl_vm_phys_pg_pack structure */ static void free_phys_pg_pack(struct hl_device *hdev, - struct hl_vm_phys_pg_pack *phys_pg_pack) + struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_vm *vm = &hdev->vm; u64 i; @@ -519,8 +515,8 @@ static inline int add_va_block(struct hl_device *hdev, * - Return the start address of the virtual block */ static u64 get_va_block(struct hl_device *hdev, - struct hl_va_range *va_range, u64 size, u64 hint_addr, - bool is_userptr) + struct hl_va_range *va_range, u64 size, u64 hint_addr, + bool is_userptr) { struct hl_vm_va_block *va_block, *new_va_block = NULL; u64 valid_start, valid_size, prev_start, prev_end, page_mask, @@ -528,18 +524,17 @@ static u64 get_va_block(struct hl_device *hdev, u32 page_size; bool add_prev = false; - if (is_userptr) { + if (is_userptr) /* * We cannot know if the user allocated memory with huge pages * or not, hence we continue with the biggest possible * granularity. */ - page_size = PAGE_SIZE_2MB; - page_mask = PAGE_MASK_2MB; - } else { - page_size = hdev->asic_prop.dram_page_size; - page_mask = ~((u64)page_size - 1); - } + page_size = hdev->asic_prop.pmmu.huge_page_size; + else + page_size = hdev->asic_prop.dmmu.page_size; + + page_mask = ~((u64)page_size - 1); mutex_lock(&va_range->lock); @@ -549,7 +544,6 @@ static u64 get_va_block(struct hl_device *hdev, /* calc the first possible aligned addr */ valid_start = va_block->start; - if (valid_start & (page_size - 1)) { valid_start &= page_mask; valid_start += page_size; @@ -561,7 +555,6 @@ static u64 get_va_block(struct hl_device *hdev, if (valid_size >= size && (!new_va_block || valid_size < res_valid_size)) { - new_va_block = va_block; res_valid_start = valid_start; res_valid_size = valid_size; @@ -631,11 +624,10 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) /* * init_phys_pg_pack_from_userptr - initialize physical page pack from host - * memory - * - * @ctx : current context - * @userptr : userptr to initialize from - * @pphys_pg_pack : res pointer + * memory + * @ctx: current context + * @userptr: userptr to initialize from + * @pphys_pg_pack: result pointer * * This function does the following: * - Pin the physical pages related to the given virtual block @@ -643,16 +635,19 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) * virtual block */ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, - struct hl_userptr *userptr, - struct hl_vm_phys_pg_pack **pphys_pg_pack) + struct hl_userptr *userptr, + struct hl_vm_phys_pg_pack **pphys_pg_pack) { + struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu; struct hl_vm_phys_pg_pack *phys_pg_pack; struct scatterlist *sg; dma_addr_t dma_addr; u64 page_mask, total_npages; - u32 npages, page_size = PAGE_SIZE; + u32 npages, page_size = PAGE_SIZE, + huge_page_size = mmu_prop->huge_page_size; bool first = true, is_huge_page_opt = true; int rc, i, j; + u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); if (!phys_pg_pack) @@ -675,14 +670,14 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, total_npages += npages; - if ((npages % PGS_IN_2MB_PAGE) || - (dma_addr & (PAGE_SIZE_2MB - 1))) + if ((npages % pgs_in_huge_page) || + (dma_addr & (huge_page_size - 1))) is_huge_page_opt = false; } if (is_huge_page_opt) { - page_size = PAGE_SIZE_2MB; - total_npages /= PGS_IN_2MB_PAGE; + page_size = huge_page_size; + do_div(total_npages, pgs_in_huge_page); } page_mask = ~(((u64) page_size) - 1); @@ -714,7 +709,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, dma_addr += page_size; if (is_huge_page_opt) - npages -= PGS_IN_2MB_PAGE; + npages -= pgs_in_huge_page; else npages--; } @@ -731,19 +726,18 @@ page_pack_arr_mem_err: } /* - * map_phys_page_pack - maps the physical page pack - * - * @ctx : current context - * @vaddr : start address of the virtual area to map from - * @phys_pg_pack : the pack of physical pages to map to + * map_phys_pg_pack - maps the physical page pack. + * @ctx: current context + * @vaddr: start address of the virtual area to map from + * @phys_pg_pack: the pack of physical pages to map to * * This function does the following: * - Maps each chunk of virtual memory to matching physical chunk * - Stores number of successful mappings in the given argument - * - Returns 0 on success, error code otherwise. + * - Returns 0 on success, error code otherwise */ -static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr, - struct hl_vm_phys_pg_pack *phys_pg_pack) +static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_device *hdev = ctx->hdev; u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; @@ -783,6 +777,36 @@ err: return rc; } +/* + * unmap_phys_pg_pack - unmaps the physical page pack + * @ctx: current context + * @vaddr: start address of the virtual area to unmap + * @phys_pg_pack: the pack of physical pages to unmap + */ +static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct hl_device *hdev = ctx->hdev; + u64 next_vaddr, i; + u32 page_size; + + page_size = phys_pg_pack->page_size; + next_vaddr = vaddr; + + for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { + if (hl_mmu_unmap(ctx, next_vaddr, page_size)) + dev_warn_ratelimited(hdev->dev, + "unmap failed for vaddr: 0x%llx\n", next_vaddr); + + /* + * unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + */ + if (hdev->pldm) + usleep_range(500, 1000); + } +} + static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *paddr) { @@ -839,7 +863,10 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, *device_addr = 0; if (is_userptr) { - rc = get_userptr_from_host_va(hdev, args, &userptr); + u64 addr = args->map_host.host_virt_addr, + size = args->map_host.mem_size; + + rc = dma_map_host_va(hdev, addr, size, &userptr); if (rc) { dev_err(hdev->dev, "failed to get userptr from va\n"); return rc; @@ -850,7 +877,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, if (rc) { dev_err(hdev->dev, "unable to init page pack for vaddr 0x%llx\n", - args->map_host.host_virt_addr); + addr); goto init_page_pack_err; } @@ -909,7 +936,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, mutex_lock(&ctx->mmu_lock); - rc = map_phys_page_pack(ctx, ret_vaddr, phys_pg_pack); + rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { mutex_unlock(&ctx->mmu_lock); dev_err(hdev->dev, "mapping page pack failed for handle %u\n", @@ -917,7 +944,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto map_err; } - hdev->asic_funcs->mmu_invalidate_cache(hdev, false); + hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); mutex_unlock(&ctx->mmu_lock); @@ -955,7 +982,7 @@ shared_err: free_phys_pg_pack(hdev, phys_pg_pack); init_page_pack_err: if (is_userptr) - free_userptr(hdev, userptr); + dma_unmap_host_va(hdev, userptr); return rc; } @@ -965,20 +992,20 @@ init_page_pack_err: * * @ctx : current context * @vaddr : device virtual address to unmap + * @ctx_free : true if in context free flow, false otherwise. * * This function does the following: * - Unmap the physical pages related to the given virtual address * - return the device virtual block to the virtual block list */ -static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) +static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) { struct hl_device *hdev = ctx->hdev; struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; struct hl_vm_hash_node *hnode = NULL; struct hl_userptr *userptr = NULL; + struct hl_va_range *va_range; enum vm_type_t *vm_type; - u64 next_vaddr, i; - u32 page_size; bool is_userptr; int rc; @@ -1003,9 +1030,10 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) if (*vm_type == VM_TYPE_USERPTR) { is_userptr = true; + va_range = &ctx->host_va_range; userptr = hnode->ptr; rc = init_phys_pg_pack_from_userptr(ctx, userptr, - &phys_pg_pack); + &phys_pg_pack); if (rc) { dev_err(hdev->dev, "unable to init page pack for vaddr 0x%llx\n", @@ -1014,6 +1042,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) } } else if (*vm_type == VM_TYPE_PHYS_PACK) { is_userptr = false; + va_range = &ctx->dram_va_range; phys_pg_pack = hnode->ptr; } else { dev_warn(hdev->dev, @@ -1029,42 +1058,41 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) goto mapping_cnt_err; } - page_size = phys_pg_pack->page_size; - vaddr &= ~(((u64) page_size) - 1); - - next_vaddr = vaddr; + vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); mutex_lock(&ctx->mmu_lock); - for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size)) - dev_warn_ratelimited(hdev->dev, - "unmap failed for vaddr: 0x%llx\n", next_vaddr); - - /* unmapping on Palladium can be really long, so avoid a CPU - * soft lockup bug by sleeping a little between unmapping pages - */ - if (hdev->pldm) - usleep_range(500, 1000); - } + unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true); + /* + * During context free this function is called in a loop to clean all + * the context mappings. Hence the cache invalidation can be called once + * at the loop end rather than for each iteration + */ + if (!ctx_free) + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, *vm_type); mutex_unlock(&ctx->mmu_lock); - if (add_va_block(hdev, - is_userptr ? &ctx->host_va_range : &ctx->dram_va_range, - vaddr, - vaddr + phys_pg_pack->total_size - 1)) - dev_warn(hdev->dev, "add va block failed for vaddr: 0x%llx\n", - vaddr); + /* + * No point in maintaining the free VA block list if the context is + * closing as the list will be freed anyway + */ + if (!ctx_free) { + rc = add_va_block(hdev, va_range, vaddr, + vaddr + phys_pg_pack->total_size - 1); + if (rc) + dev_warn(hdev->dev, + "add va block failed for vaddr: 0x%llx\n", + vaddr); + } atomic_dec(&phys_pg_pack->mapping_cnt); kfree(hnode); if (is_userptr) { free_phys_pg_pack(hdev, phys_pg_pack); - free_userptr(hdev, userptr); + dma_unmap_host_va(hdev, userptr); } return 0; @@ -1189,8 +1217,8 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) break; case HL_MEM_OP_UNMAP: - rc = unmap_device_va(ctx, - args->in.unmap.device_virt_addr); + rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr, + false); break; default: @@ -1203,20 +1231,72 @@ out: return rc; } +static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, + u32 npages, u64 start, u32 offset, + struct hl_userptr *userptr) +{ + int rc; + + if (!access_ok((void __user *) (uintptr_t) addr, size)) { + dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); + return -EFAULT; + } + + userptr->vec = frame_vector_create(npages); + if (!userptr->vec) { + dev_err(hdev->dev, "Failed to create frame vector\n"); + return -ENOMEM; + } + + rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, + userptr->vec); + + if (rc != npages) { + dev_err(hdev->dev, + "Failed to map host memory, user ptr probably wrong\n"); + if (rc < 0) + goto destroy_framevec; + rc = -EFAULT; + goto put_framevec; + } + + if (frame_vector_to_pages(userptr->vec) < 0) { + dev_err(hdev->dev, + "Failed to translate frame vector to pages\n"); + rc = -EFAULT; + goto put_framevec; + } + + rc = sg_alloc_table_from_pages(userptr->sgt, + frame_vector_pages(userptr->vec), + npages, offset, size, GFP_ATOMIC); + if (rc < 0) { + dev_err(hdev->dev, "failed to create SG table from pages\n"); + goto put_framevec; + } + + return 0; + +put_framevec: + put_vaddr_frames(userptr->vec); +destroy_framevec: + frame_vector_destroy(userptr->vec); + return rc; +} + /* - * hl_pin_host_memory - pins a chunk of host memory - * - * @hdev : pointer to the habanalabs device structure - * @addr : the user-space virtual address of the memory area - * @size : the size of the memory area - * @userptr : pointer to hl_userptr structure + * hl_pin_host_memory - pins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @userptr: pointer to hl_userptr structure * * This function does the following: * - Pins the physical pages - * - Create a SG list from those pages + * - Create an SG list from those pages */ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, - struct hl_userptr *userptr) + struct hl_userptr *userptr) { u64 start, end; u32 npages, offset; @@ -1227,11 +1307,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } - if (!access_ok((void __user *) (uintptr_t) addr, size)) { - dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); - return -EFAULT; - } - /* * If the combination of the address and size requested for this memory * region causes an integer overflow, return error. @@ -1244,6 +1319,14 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } + /* + * This function can be called also from data path, hence use atomic + * always as it is not a big allocation. + */ + userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); + if (!userptr->sgt) + return -ENOMEM; + start = addr & PAGE_MASK; offset = addr & ~PAGE_MASK; end = PAGE_ALIGN(addr + size); @@ -1254,42 +1337,12 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, userptr->dma_mapped = false; INIT_LIST_HEAD(&userptr->job_node); - userptr->vec = frame_vector_create(npages); - if (!userptr->vec) { - dev_err(hdev->dev, "Failed to create frame vector\n"); - return -ENOMEM; - } - - rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, - userptr->vec); - - if (rc != npages) { - dev_err(hdev->dev, - "Failed to map host memory, user ptr probably wrong\n"); - if (rc < 0) - goto destroy_framevec; - rc = -EFAULT; - goto put_framevec; - } - - if (frame_vector_to_pages(userptr->vec) < 0) { + rc = get_user_memory(hdev, addr, size, npages, start, offset, + userptr); + if (rc) { dev_err(hdev->dev, - "Failed to translate frame vector to pages\n"); - rc = -EFAULT; - goto put_framevec; - } - - userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); - if (!userptr->sgt) { - rc = -ENOMEM; - goto put_framevec; - } - - rc = sg_alloc_table_from_pages(userptr->sgt, - frame_vector_pages(userptr->vec), - npages, offset, size, GFP_ATOMIC); - if (rc < 0) { - dev_err(hdev->dev, "failed to create SG table from pages\n"); + "failed to get user memory for address 0x%llx\n", + addr); goto free_sgt; } @@ -1299,34 +1352,28 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, free_sgt: kfree(userptr->sgt); -put_framevec: - put_vaddr_frames(userptr->vec); -destroy_framevec: - frame_vector_destroy(userptr->vec); return rc; } /* - * hl_unpin_host_memory - unpins a chunk of host memory - * - * @hdev : pointer to the habanalabs device structure - * @userptr : pointer to hl_userptr structure + * hl_unpin_host_memory - unpins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @userptr: pointer to hl_userptr structure * * This function does the following: * - Unpins the physical pages related to the host memory * - Free the SG list */ -int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) { struct page **pages; hl_debugfs_remove_userptr(hdev, userptr); if (userptr->dma_mapped) - hdev->asic_funcs->hl_dma_unmap_sg(hdev, - userptr->sgt->sgl, - userptr->sgt->nents, - userptr->dir); + hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, + userptr->sgt->nents, + userptr->dir); pages = frame_vector_pages(userptr->vec); if (!IS_ERR(pages)) { @@ -1342,8 +1389,6 @@ int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) sg_free_table(userptr->sgt); kfree(userptr->sgt); - - return 0; } /* @@ -1542,43 +1587,16 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) * @hdev : pointer to the habanalabs structure * va_range : pointer to virtual addresses range * - * This function initializes the following: - * - Checks that the given range contains the whole initial range + * This function does the following: * - Frees the virtual addresses block list and its lock */ static void hl_va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range) { - struct hl_vm_va_block *va_block; - - if (list_empty(&va_range->list)) { - dev_warn(hdev->dev, - "va list should not be empty on cleanup!\n"); - goto out; - } - - if (!list_is_singular(&va_range->list)) { - dev_warn(hdev->dev, - "va list should not contain multiple blocks on cleanup!\n"); - goto free_va_list; - } - - va_block = list_first_entry(&va_range->list, typeof(*va_block), node); - - if (va_block->start != va_range->start_addr || - va_block->end != va_range->end_addr) { - dev_warn(hdev->dev, - "wrong va block on cleanup, from 0x%llx to 0x%llx\n", - va_block->start, va_block->end); - goto free_va_list; - } - -free_va_list: mutex_lock(&va_range->lock); clear_va_list_locked(hdev, &va_range->list); mutex_unlock(&va_range->lock); -out: mutex_destroy(&va_range->lock); } @@ -1613,21 +1631,31 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) hl_debugfs_remove_ctx_mem_hash(hdev, ctx); - if (!hash_empty(ctx->mem_hash)) - dev_notice(hdev->dev, "ctx is freed while it has va in use\n"); + /* + * Clearly something went wrong on hard reset so no point in printing + * another side effect error + */ + if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) + dev_notice(hdev->dev, + "ctx %d is freed while it has va in use\n", + ctx->asid); hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { dev_dbg(hdev->dev, "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", hnode->vaddr, ctx->asid); - unmap_device_va(ctx, hnode->vaddr); + unmap_device_va(ctx, hnode->vaddr, true); } + /* invalidate the cache once after the unmapping loop */ + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); + spin_lock(&vm->idr_lock); idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) if (phys_pg_list->asid == ctx->asid) { dev_dbg(hdev->dev, - "page list 0x%p of asid %d is still alive\n", + "page list 0x%px of asid %d is still alive\n", phys_pg_list, ctx->asid); atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem); diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 176c315836f1..6262b26e2086 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -25,10 +25,9 @@ static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) return pgt_info; } -static void free_hop(struct hl_ctx *ctx, u64 hop_addr) +static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) { struct hl_device *hdev = ctx->hdev; - struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr, hdev->asic_prop.mmu_hop_table_size); @@ -37,6 +36,13 @@ static void free_hop(struct hl_ctx *ctx, u64 hop_addr) kfree(pgt_info); } +static void free_hop(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); + + _free_hop(ctx, pgt_info); +} + static u64 alloc_hop(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; @@ -105,8 +111,8 @@ static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) * clear the 12 LSBs and translate the shadow hop to its associated * physical hop, and add back the original 12 LSBs. */ - u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) | - (val & OFFSET_MASK); + u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | + (val & FLAGS_MASK); ctx->hdev->asic_funcs->write_pte(ctx->hdev, get_phys_addr(ctx, shadow_pte_addr), @@ -159,7 +165,7 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) */ num_of_ptes_left = pgt_info->num_of_ptes; if (!num_of_ptes_left) - free_hop(ctx, hop_addr); + _free_hop(ctx, pgt_info); return num_of_ptes_left; } @@ -171,35 +177,50 @@ static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, ((virt_addr & mask) >> shift); } -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, + mmu_prop->hop0_shift); } -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, + mmu_prop->hop1_shift); } -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, + mmu_prop->hop2_shift); } -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, + mmu_prop->hop3_shift); } -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, + mmu_prop->hop4_shift); } static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) { if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & PHYS_ADDR_MASK; + return curr_pte & HOP_PHYS_ADDR_MASK; else return ULLONG_MAX; } @@ -288,23 +309,23 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) } /* need only pte 0 in hops 0 and 1 */ - pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop0_addr, pte_val); - pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop1_addr, pte_val); get_pte(ctx, hop1_addr); hop2_pte_addr = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { - pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) | + pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop2_pte_addr, pte_val); get_pte(ctx, hop2_addr); hop2_pte_addr += HL_PTE_SIZE; } - pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) | + pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; for (i = 0 ; i < num_of_hop3 ; i++) { @@ -400,8 +421,6 @@ int hl_mmu_init(struct hl_device *hdev) if (!hdev->mmu_enable) return 0; - /* MMU H/W init was already done in device hw_init() */ - hdev->mmu_pgt_pool = gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); @@ -427,6 +446,8 @@ int hl_mmu_init(struct hl_device *hdev) goto err_pool_add; } + /* MMU H/W init will be done in device hw_init() */ + return 0; err_pool_add: @@ -450,10 +471,10 @@ void hl_mmu_fini(struct hl_device *hdev) if (!hdev->mmu_enable) return; + /* MMU H/W fini was already done in device hw_fini() */ + kvfree(hdev->mmu_shadow_hop0); gen_pool_destroy(hdev->mmu_pgt_pool); - - /* MMU H/W fini will be done in device hw_fini() */ } /** @@ -501,36 +522,36 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) dram_default_mapping_fini(ctx); if (!hash_empty(ctx->mmu_shadow_hash)) - dev_err(hdev->dev, "ctx is freed while it has pgts in use\n"); + dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", + ctx->asid); hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { - dev_err(hdev->dev, + dev_err_ratelimited(hdev->dev, "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); - free_hop(ctx, pgt_info->shadow_addr); + _free_hop(ctx, pgt_info); } mutex_destroy(&ctx->mmu_lock); } -static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) +static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 hop0_addr = 0, hop0_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0, hop3_addr = 0, hop3_pte_addr = 0, hop4_addr = 0, hop4_pte_addr = 0, curr_pte; - bool is_dram_addr, is_huge, clear_hop3 = true; + bool is_huge, clear_hop3 = true; - is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; @@ -539,7 +560,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop1_addr == ULLONG_MAX) goto not_mapped; - hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; @@ -548,7 +569,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop2_addr == ULLONG_MAX) goto not_mapped; - hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; @@ -557,7 +578,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop3_addr == ULLONG_MAX) goto not_mapped; - hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; @@ -575,7 +596,8 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop4_addr == ULLONG_MAX) goto not_mapped; - hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; @@ -584,7 +606,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hdev->dram_default_page_mapping && is_dram_addr) { u64 default_pte = (prop->mmu_dram_default_page_addr & - PTE_PHYS_ADDR_MASK) | LAST_MASK | + HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; if (curr_pte == default_pte) { dev_err(hdev->dev, @@ -667,25 +689,36 @@ not_mapped: int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 real_virt_addr; u32 real_page_size, npages; int i, rc; + bool is_dram_addr; if (!hdev->mmu_enable) return 0; + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + /* - * The H/W handles mapping of 4KB/2MB page. Hence if the host page size - * is bigger, we break it to sub-pages and unmap them separately. + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and unmap them separately. */ - if ((page_size % PAGE_SIZE_2MB) == 0) { - real_page_size = PAGE_SIZE_2MB; - } else if ((page_size % PAGE_SIZE_4KB) == 0) { - real_page_size = PAGE_SIZE_4KB; + if ((page_size % mmu_prop->huge_page_size) == 0) { + real_page_size = mmu_prop->huge_page_size; + } else if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not 4KB nor 2MB aligned, can't unmap\n", - page_size); + "page size of %u is not %uKB nor %uMB aligned, can't unmap\n", + page_size, + mmu_prop->page_size >> 10, + mmu_prop->huge_page_size >> 20); return -EFAULT; } @@ -694,7 +727,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) real_virt_addr = virt_addr; for (i = 0 ; i < npages ; i++) { - rc = _hl_mmu_unmap(ctx, real_virt_addr); + rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr); if (rc) return rc; @@ -705,10 +738,11 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) } static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, - u32 page_size) + u32 page_size, bool is_dram_addr) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 hop0_addr = 0, hop0_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0, @@ -716,21 +750,19 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, hop4_addr = 0, hop4_pte_addr = 0, curr_pte = 0; bool hop1_new = false, hop2_new = false, hop3_new = false, - hop4_new = false, is_huge, is_dram_addr; + hop4_new = false, is_huge; int rc = -ENOMEM; + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + /* - * This mapping function can map a 4KB/2MB page. For 2MB page there are - * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB - * pages only but user memory could have been allocated with one of the - * two page sizes. Since this is a common code for all the three cases, - * we need this hugs page check. + * This mapping function can map a page or a huge page. For huge page + * there are only 3 hops rather than 4. Currently the DRAM allocation + * uses huge pages only but user memory could have been allocated with + * one of the two page sizes. Since this is a common code for all the + * three cases, we need this hugs page check. */ - is_huge = page_size == PAGE_SIZE_2MB; - - is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + is_huge = page_size == mmu_prop->huge_page_size; if (is_dram_addr && !is_huge) { dev_err(hdev->dev, "DRAM mapping should use huge pages only\n"); @@ -738,28 +770,28 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, } hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); if (hop1_addr == ULLONG_MAX) goto err; - hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); if (hop2_addr == ULLONG_MAX) goto err; - hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); if (hop3_addr == ULLONG_MAX) goto err; - hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; if (!is_huge) { @@ -767,13 +799,14 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (hop4_addr == ULLONG_MAX) goto err; - hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; } if (hdev->dram_default_page_mapping && is_dram_addr) { u64 default_pte = (prop->mmu_dram_default_page_addr & - PTE_PHYS_ADDR_MASK) | LAST_MASK | + HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; if (curr_pte != default_pte) { @@ -813,7 +846,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, goto err; } - curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK + curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; if (is_huge) @@ -823,25 +856,25 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (hop1_new) { curr_pte = - (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop0_pte_addr, curr_pte); } if (hop2_new) { curr_pte = - (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop1_pte_addr, curr_pte); get_pte(ctx, hop1_addr); } if (hop3_new) { curr_pte = - (hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop2_pte_addr, curr_pte); get_pte(ctx, hop2_addr); } if (!is_huge) { if (hop4_new) { - curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) | + curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop3_pte_addr, curr_pte); get_pte(ctx, hop3_addr); @@ -890,25 +923,36 @@ err: int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 real_virt_addr, real_phys_addr; u32 real_page_size, npages; int i, rc, mapped_cnt = 0; + bool is_dram_addr; if (!hdev->mmu_enable) return 0; + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + /* - * The H/W handles mapping of 4KB/2MB page. Hence if the host page size - * is bigger, we break it to sub-pages and map them separately. + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and map them separately. */ - if ((page_size % PAGE_SIZE_2MB) == 0) { - real_page_size = PAGE_SIZE_2MB; - } else if ((page_size % PAGE_SIZE_4KB) == 0) { - real_page_size = PAGE_SIZE_4KB; + if ((page_size % mmu_prop->huge_page_size) == 0) { + real_page_size = mmu_prop->huge_page_size; + } else if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not 4KB nor 2MB aligned, can't map\n", - page_size); + "page size of %u is not %dKB nor %dMB aligned, can't unmap\n", + page_size, + mmu_prop->page_size >> 10, + mmu_prop->huge_page_size >> 20); return -EFAULT; } @@ -923,7 +967,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) for (i = 0 ; i < npages ; i++) { rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr, - real_page_size); + real_page_size, is_dram_addr); if (rc) goto err; @@ -937,7 +981,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) err: real_virt_addr = virt_addr; for (i = 0 ; i < mapped_cnt ; i++) { - if (_hl_mmu_unmap(ctx, real_virt_addr)) + if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr)) dev_warn_ratelimited(hdev->dev, "failed to unmap va: 0x%llx\n", real_virt_addr); diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c index 25eb46d29d88..4cd622b017b9 100644 --- a/drivers/misc/habanalabs/sysfs.c +++ b/drivers/misc/habanalabs/sysfs.c @@ -21,12 +21,12 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) memset(&pkt, 0, sizeof(pkt)); if (curr) - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET << ARMCP_PKT_CTL_OPCODE_SHIFT); else - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET << ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = __cpu_to_le32(pll_index); + pkt.pll_index = cpu_to_le32(pll_index); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), SET_CLK_PKT_TIMEOUT, &result); @@ -48,10 +48,10 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET << ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_index = __cpu_to_le32(pll_index); - pkt.value = __cpu_to_le64(freq); + pkt.pll_index = cpu_to_le32(pll_index); + pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), SET_CLK_PKT_TIMEOUT, NULL); @@ -70,7 +70,7 @@ u64 hl_get_max_power(struct hl_device *hdev) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET << ARMCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -91,9 +91,9 @@ void hl_set_max_power(struct hl_device *hdev, u64 value) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = __cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET << + pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET << ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.value = __cpu_to_le64(value); + pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), SET_PWR_PKT_TIMEOUT, NULL); @@ -102,100 +102,6 @@ void hl_set_max_power(struct hl_device *hdev, u64 value) dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); } -static ssize_t pm_mng_profile_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - if (hl_device_disabled_or_in_reset(hdev)) - return -ENODEV; - - return sprintf(buf, "%s\n", - (hdev->pm_mng_profile == PM_AUTO) ? "auto" : - (hdev->pm_mng_profile == PM_MANUAL) ? "manual" : - "unknown"); -} - -static ssize_t pm_mng_profile_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - if (hl_device_disabled_or_in_reset(hdev)) { - count = -ENODEV; - goto out; - } - - mutex_lock(&hdev->fd_open_cnt_lock); - - if (atomic_read(&hdev->fd_open_cnt) > 0) { - dev_err(hdev->dev, - "Can't change PM profile while user process is opened on the device\n"); - count = -EPERM; - goto unlock_mutex; - } - - if (strncmp("auto", buf, strlen("auto")) == 0) { - /* Make sure we are in LOW PLL when changing modes */ - if (hdev->pm_mng_profile == PM_MANUAL) { - atomic_set(&hdev->curr_pll_profile, PLL_HIGH); - hl_device_set_frequency(hdev, PLL_LOW); - hdev->pm_mng_profile = PM_AUTO; - } - } else if (strncmp("manual", buf, strlen("manual")) == 0) { - /* Make sure we are in LOW PLL when changing modes */ - if (hdev->pm_mng_profile == PM_AUTO) { - flush_delayed_work(&hdev->work_freq); - hdev->pm_mng_profile = PM_MANUAL; - } - } else { - dev_err(hdev->dev, "value should be auto or manual\n"); - count = -EINVAL; - goto unlock_mutex; - } - -unlock_mutex: - mutex_unlock(&hdev->fd_open_cnt_lock); -out: - return count; -} - -static ssize_t high_pll_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - if (hl_device_disabled_or_in_reset(hdev)) - return -ENODEV; - - return sprintf(buf, "%u\n", hdev->high_pll); -} - -static ssize_t high_pll_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - long value; - int rc; - - if (hl_device_disabled_or_in_reset(hdev)) { - count = -ENODEV; - goto out; - } - - rc = kstrtoul(buf, 0, &value); - - if (rc) { - count = -EINVAL; - goto out; - } - - hdev->high_pll = value; - -out: - return count; -} - static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -351,14 +257,6 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%s\n", str); } -static ssize_t write_open_cnt_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hl_device *hdev = dev_get_drvdata(dev); - - return sprintf(buf, "%d\n", hdev->user_ctx ? 1 : 0); -} - static ssize_t soft_reset_cnt_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -450,18 +348,15 @@ static DEVICE_ATTR_RO(device_type); static DEVICE_ATTR_RO(fuse_ver); static DEVICE_ATTR_WO(hard_reset); static DEVICE_ATTR_RO(hard_reset_cnt); -static DEVICE_ATTR_RW(high_pll); static DEVICE_ATTR_RO(infineon_ver); static DEVICE_ATTR_RW(max_power); static DEVICE_ATTR_RO(pci_addr); -static DEVICE_ATTR_RW(pm_mng_profile); static DEVICE_ATTR_RO(preboot_btl_ver); static DEVICE_ATTR_WO(soft_reset); static DEVICE_ATTR_RO(soft_reset_cnt); static DEVICE_ATTR_RO(status); static DEVICE_ATTR_RO(thermal_ver); static DEVICE_ATTR_RO(uboot_ver); -static DEVICE_ATTR_RO(write_open_cnt); static struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, @@ -477,18 +372,15 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_fuse_ver.attr, &dev_attr_hard_reset.attr, &dev_attr_hard_reset_cnt.attr, - &dev_attr_high_pll.attr, &dev_attr_infineon_ver.attr, &dev_attr_max_power.attr, &dev_attr_pci_addr.attr, - &dev_attr_pm_mng_profile.attr, &dev_attr_preboot_btl_ver.attr, &dev_attr_soft_reset.attr, &dev_attr_soft_reset_cnt.attr, &dev_attr_status.attr, &dev_attr_thermal_ver.attr, &dev_attr_uboot_ver.attr, - &dev_attr_write_open_cnt.attr, NULL, }; diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h index 94dfb9e40e29..1aa433a7f66c 100644 --- a/drivers/misc/hpilo.h +++ b/drivers/misc/hpilo.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/drivers/char/hpilo.h * diff --git a/drivers/misc/ibmvmc.h b/drivers/misc/ibmvmc.h index e140ada8fe2c..0e1756fffeae 100644 --- a/drivers/misc/ibmvmc.h +++ b/drivers/misc/ibmvmc.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0+ - * +/* SPDX-License-Identifier: GPL-2.0+ */ +/* * linux/drivers/misc/ibmvmc.h * * IBM Power Systems Virtual Management Channel Support. diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c deleted file mode 100644 index 9d0445a567db..000000000000 --- a/drivers/misc/ioc4.c +++ /dev/null @@ -1,498 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2005-2006 Silicon Graphics, Inc. All Rights Reserved. - */ - -/* This file contains the master driver module for use by SGI IOC4 subdrivers. - * - * It allocates any resources shared between multiple subdevices, and - * provides accessor functions (where needed) and the like for those - * resources. It also provides a mechanism for the subdevice modules - * to support loading and unloading. - * - * Non-shared resources (e.g. external interrupt A_INT_OUT register page - * alias, serial port and UART registers) are handled by the subdevice - * modules themselves. - * - * This is all necessary because IOC4 is not implemented as a multi-function - * PCI device, but an amalgamation of disparate registers for several - * types of device (ATA, serial, external interrupts). The normal - * resource management in the kernel doesn't have quite the right interfaces - * to handle this situation (e.g. multiple modules can't claim the same - * PCI ID), thus this IOC4 master module. - */ - -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/ioc4.h> -#include <linux/ktime.h> -#include <linux/slab.h> -#include <linux/mutex.h> -#include <linux/time.h> -#include <asm/io.h> - -/*************** - * Definitions * - ***************/ - -/* Tweakable values */ - -/* PCI bus speed detection/calibration */ -#define IOC4_CALIBRATE_COUNT 63 /* Calibration cycle period */ -#define IOC4_CALIBRATE_CYCLES 256 /* Average over this many cycles */ -#define IOC4_CALIBRATE_DISCARD 2 /* Discard first few cycles */ -#define IOC4_CALIBRATE_LOW_MHZ 25 /* Lower bound on bus speed sanity */ -#define IOC4_CALIBRATE_HIGH_MHZ 75 /* Upper bound on bus speed sanity */ -#define IOC4_CALIBRATE_DEFAULT_MHZ 66 /* Assumed if sanity check fails */ - -/************************ - * Submodule management * - ************************/ - -static DEFINE_MUTEX(ioc4_mutex); - -static LIST_HEAD(ioc4_devices); -static LIST_HEAD(ioc4_submodules); - -/* Register an IOC4 submodule */ -int -ioc4_register_submodule(struct ioc4_submodule *is) -{ - struct ioc4_driver_data *idd; - - mutex_lock(&ioc4_mutex); - list_add(&is->is_list, &ioc4_submodules); - - /* Initialize submodule for each IOC4 */ - if (!is->is_probe) - goto out; - - list_for_each_entry(idd, &ioc4_devices, idd_list) { - if (is->is_probe(idd)) { - printk(KERN_WARNING - "%s: IOC4 submodule %s probe failed " - "for pci_dev %s", - __func__, module_name(is->is_owner), - pci_name(idd->idd_pdev)); - } - } - out: - mutex_unlock(&ioc4_mutex); - return 0; -} - -/* Unregister an IOC4 submodule */ -void -ioc4_unregister_submodule(struct ioc4_submodule *is) -{ - struct ioc4_driver_data *idd; - - mutex_lock(&ioc4_mutex); - list_del(&is->is_list); - - /* Remove submodule for each IOC4 */ - if (!is->is_remove) - goto out; - - list_for_each_entry(idd, &ioc4_devices, idd_list) { - if (is->is_remove(idd)) { - printk(KERN_WARNING - "%s: IOC4 submodule %s remove failed " - "for pci_dev %s.\n", - __func__, module_name(is->is_owner), - pci_name(idd->idd_pdev)); - } - } - out: - mutex_unlock(&ioc4_mutex); -} - -/********************* - * Device management * - *********************/ - -#define IOC4_CALIBRATE_LOW_LIMIT \ - (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_LOW_MHZ) -#define IOC4_CALIBRATE_HIGH_LIMIT \ - (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_HIGH_MHZ) -#define IOC4_CALIBRATE_DEFAULT \ - (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_DEFAULT_MHZ) - -#define IOC4_CALIBRATE_END \ - (IOC4_CALIBRATE_CYCLES + IOC4_CALIBRATE_DISCARD) - -#define IOC4_INT_OUT_MODE_TOGGLE 0x7 /* Toggle INT_OUT every COUNT+1 ticks */ - -/* Determines external interrupt output clock period of the PCI bus an - * IOC4 is attached to. This value can be used to determine the PCI - * bus speed. - * - * IOC4 has a design feature that various internal timers are derived from - * the PCI bus clock. This causes IOC4 device drivers to need to take the - * bus speed into account when setting various register values (e.g. INT_OUT - * register COUNT field, UART divisors, etc). Since this information is - * needed by several subdrivers, it is determined by the main IOC4 driver, - * even though the following code utilizes external interrupt registers - * to perform the speed calculation. - */ -static void -ioc4_clock_calibrate(struct ioc4_driver_data *idd) -{ - union ioc4_int_out int_out; - union ioc4_gpcr gpcr; - unsigned int state, last_state; - uint64_t start, end, period; - unsigned int count; - - /* Enable output */ - gpcr.raw = 0; - gpcr.fields.dir = IOC4_GPCR_DIR_0; - gpcr.fields.int_out_en = 1; - writel(gpcr.raw, &idd->idd_misc_regs->gpcr_s.raw); - - /* Reset to power-on state */ - writel(0, &idd->idd_misc_regs->int_out.raw); - - /* Set up square wave */ - int_out.raw = 0; - int_out.fields.count = IOC4_CALIBRATE_COUNT; - int_out.fields.mode = IOC4_INT_OUT_MODE_TOGGLE; - int_out.fields.diag = 0; - writel(int_out.raw, &idd->idd_misc_regs->int_out.raw); - - /* Check square wave period averaged over some number of cycles */ - start = ktime_get_ns(); - state = 1; /* make sure the first read isn't a rising edge */ - for (count = 0; count <= IOC4_CALIBRATE_END; count++) { - do { /* wait for a rising edge */ - last_state = state; - int_out.raw = readl(&idd->idd_misc_regs->int_out.raw); - state = int_out.fields.int_out; - } while (last_state || !state); - - /* discard the first few cycles */ - if (count == IOC4_CALIBRATE_DISCARD) - start = ktime_get_ns(); - } - end = ktime_get_ns(); - - /* Calculation rearranged to preserve intermediate precision. - * Logically: - * 1. "end - start" gives us the measurement period over all - * the square wave cycles. - * 2. Divide by number of square wave cycles to get the period - * of a square wave cycle. - * 3. Divide by 2*(int_out.fields.count+1), which is the formula - * by which the IOC4 generates the square wave, to get the - * period of an IOC4 INT_OUT count. - */ - period = (end - start) / - (IOC4_CALIBRATE_CYCLES * 2 * (IOC4_CALIBRATE_COUNT + 1)); - - /* Bounds check the result. */ - if (period > IOC4_CALIBRATE_LOW_LIMIT || - period < IOC4_CALIBRATE_HIGH_LIMIT) { - printk(KERN_INFO - "IOC4 %s: Clock calibration failed. Assuming" - "PCI clock is %d ns.\n", - pci_name(idd->idd_pdev), - IOC4_CALIBRATE_DEFAULT / IOC4_EXTINT_COUNT_DIVISOR); - period = IOC4_CALIBRATE_DEFAULT; - } else { - u64 ns = period; - - do_div(ns, IOC4_EXTINT_COUNT_DIVISOR); - printk(KERN_DEBUG - "IOC4 %s: PCI clock is %llu ns.\n", - pci_name(idd->idd_pdev), (unsigned long long)ns); - } - - /* Remember results. We store the extint clock period rather - * than the PCI clock period so that greater precision is - * retained. Divide by IOC4_EXTINT_COUNT_DIVISOR to get - * PCI clock period. - */ - idd->count_period = period; -} - -/* There are three variants of IOC4 cards: IO9, IO10, and PCI-RT. - * Each brings out different combinations of IOC4 signals, thus. - * the IOC4 subdrivers need to know to which we're attached. - * - * We look for the presence of a SCSI (IO9) or SATA (IO10) controller - * on the same PCI bus at slot number 3 to differentiate IO9 from IO10. - * If neither is present, it's a PCI-RT. - */ -static unsigned int -ioc4_variant(struct ioc4_driver_data *idd) -{ - struct pci_dev *pdev = NULL; - int found = 0; - - /* IO9: Look for a QLogic ISP 12160 at the same bus and slot 3. */ - do { - pdev = pci_get_device(PCI_VENDOR_ID_QLOGIC, - PCI_DEVICE_ID_QLOGIC_ISP12160, pdev); - if (pdev && - idd->idd_pdev->bus->number == pdev->bus->number && - 3 == PCI_SLOT(pdev->devfn)) - found = 1; - } while (pdev && !found); - if (NULL != pdev) { - pci_dev_put(pdev); - return IOC4_VARIANT_IO9; - } - - /* IO10: Look for a Vitesse VSC 7174 at the same bus and slot 3. */ - pdev = NULL; - do { - pdev = pci_get_device(PCI_VENDOR_ID_VITESSE, - PCI_DEVICE_ID_VITESSE_VSC7174, pdev); - if (pdev && - idd->idd_pdev->bus->number == pdev->bus->number && - 3 == PCI_SLOT(pdev->devfn)) - found = 1; - } while (pdev && !found); - if (NULL != pdev) { - pci_dev_put(pdev); - return IOC4_VARIANT_IO10; - } - - /* PCI-RT: No SCSI/SATA controller will be present */ - return IOC4_VARIANT_PCI_RT; -} - -static void -ioc4_load_modules(struct work_struct *work) -{ - request_module("sgiioc4"); -} - -static DECLARE_WORK(ioc4_load_modules_work, ioc4_load_modules); - -/* Adds a new instance of an IOC4 card */ -static int -ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) -{ - struct ioc4_driver_data *idd; - struct ioc4_submodule *is; - uint32_t pcmd; - int ret; - - /* Enable IOC4 and take ownership of it */ - if ((ret = pci_enable_device(pdev))) { - printk(KERN_WARNING - "%s: Failed to enable IOC4 device for pci_dev %s.\n", - __func__, pci_name(pdev)); - goto out; - } - pci_set_master(pdev); - - /* Set up per-IOC4 data */ - idd = kmalloc(sizeof(struct ioc4_driver_data), GFP_KERNEL); - if (!idd) { - printk(KERN_WARNING - "%s: Failed to allocate IOC4 data for pci_dev %s.\n", - __func__, pci_name(pdev)); - ret = -ENODEV; - goto out_idd; - } - idd->idd_pdev = pdev; - idd->idd_pci_id = pci_id; - - /* Map IOC4 misc registers. These are shared between subdevices - * so the main IOC4 module manages them. - */ - idd->idd_bar0 = pci_resource_start(idd->idd_pdev, 0); - if (!idd->idd_bar0) { - printk(KERN_WARNING - "%s: Unable to find IOC4 misc resource " - "for pci_dev %s.\n", - __func__, pci_name(idd->idd_pdev)); - ret = -ENODEV; - goto out_pci; - } - if (!request_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs), - "ioc4_misc")) { - printk(KERN_WARNING - "%s: Unable to request IOC4 misc region " - "for pci_dev %s.\n", - __func__, pci_name(idd->idd_pdev)); - ret = -ENODEV; - goto out_pci; - } - idd->idd_misc_regs = ioremap(idd->idd_bar0, - sizeof(struct ioc4_misc_regs)); - if (!idd->idd_misc_regs) { - printk(KERN_WARNING - "%s: Unable to remap IOC4 misc region " - "for pci_dev %s.\n", - __func__, pci_name(idd->idd_pdev)); - ret = -ENODEV; - goto out_misc_region; - } - - /* Failsafe portion of per-IOC4 initialization */ - - /* Detect card variant */ - idd->idd_variant = ioc4_variant(idd); - printk(KERN_INFO "IOC4 %s: %s card detected.\n", pci_name(pdev), - idd->idd_variant == IOC4_VARIANT_IO9 ? "IO9" : - idd->idd_variant == IOC4_VARIANT_PCI_RT ? "PCI-RT" : - idd->idd_variant == IOC4_VARIANT_IO10 ? "IO10" : "unknown"); - - /* Initialize IOC4 */ - pci_read_config_dword(idd->idd_pdev, PCI_COMMAND, &pcmd); - pci_write_config_dword(idd->idd_pdev, PCI_COMMAND, - pcmd | PCI_COMMAND_PARITY | PCI_COMMAND_SERR); - - /* Determine PCI clock */ - ioc4_clock_calibrate(idd); - - /* Disable/clear all interrupts. Need to do this here lest - * one submodule request the shared IOC4 IRQ, but interrupt - * is generated by a different subdevice. - */ - /* Disable */ - writel(~0, &idd->idd_misc_regs->other_iec.raw); - writel(~0, &idd->idd_misc_regs->sio_iec); - /* Clear (i.e. acknowledge) */ - writel(~0, &idd->idd_misc_regs->other_ir.raw); - writel(~0, &idd->idd_misc_regs->sio_ir); - - /* Track PCI-device specific data */ - idd->idd_serial_data = NULL; - pci_set_drvdata(idd->idd_pdev, idd); - - mutex_lock(&ioc4_mutex); - list_add_tail(&idd->idd_list, &ioc4_devices); - - /* Add this IOC4 to all submodules */ - list_for_each_entry(is, &ioc4_submodules, is_list) { - if (is->is_probe && is->is_probe(idd)) { - printk(KERN_WARNING - "%s: IOC4 submodule 0x%s probe failed " - "for pci_dev %s.\n", - __func__, module_name(is->is_owner), - pci_name(idd->idd_pdev)); - } - } - mutex_unlock(&ioc4_mutex); - - /* Request sgiioc4 IDE driver on boards that bring that functionality - * off of IOC4. The root filesystem may be hosted on a drive connected - * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it - * won't be picked up by modprobes due to the ioc4 module owning the - * PCI device. - */ - if (idd->idd_variant != IOC4_VARIANT_PCI_RT) { - /* Request the module from a work procedure as the modprobe - * goes out to a userland helper and that will hang if done - * directly from ioc4_probe(). - */ - printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n"); - schedule_work(&ioc4_load_modules_work); - } - - return 0; - -out_misc_region: - release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs)); -out_pci: - kfree(idd); -out_idd: - pci_disable_device(pdev); -out: - return ret; -} - -/* Removes a particular instance of an IOC4 card. */ -static void -ioc4_remove(struct pci_dev *pdev) -{ - struct ioc4_submodule *is; - struct ioc4_driver_data *idd; - - idd = pci_get_drvdata(pdev); - - /* Remove this IOC4 from all submodules */ - mutex_lock(&ioc4_mutex); - list_for_each_entry(is, &ioc4_submodules, is_list) { - if (is->is_remove && is->is_remove(idd)) { - printk(KERN_WARNING - "%s: IOC4 submodule 0x%s remove failed " - "for pci_dev %s.\n", - __func__, module_name(is->is_owner), - pci_name(idd->idd_pdev)); - } - } - mutex_unlock(&ioc4_mutex); - - /* Release resources */ - iounmap(idd->idd_misc_regs); - if (!idd->idd_bar0) { - printk(KERN_WARNING - "%s: Unable to get IOC4 misc mapping for pci_dev %s. " - "Device removal may be incomplete.\n", - __func__, pci_name(idd->idd_pdev)); - } - release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs)); - - /* Disable IOC4 and relinquish */ - pci_disable_device(pdev); - - /* Remove and free driver data */ - mutex_lock(&ioc4_mutex); - list_del(&idd->idd_list); - mutex_unlock(&ioc4_mutex); - kfree(idd); -} - -static const struct pci_device_id ioc4_id_table[] = { - {PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC4, PCI_ANY_ID, - PCI_ANY_ID, 0x0b4000, 0xFFFFFF}, - {0} -}; - -static struct pci_driver ioc4_driver = { - .name = "IOC4", - .id_table = ioc4_id_table, - .probe = ioc4_probe, - .remove = ioc4_remove, -}; - -MODULE_DEVICE_TABLE(pci, ioc4_id_table); - -/********************* - * Module management * - *********************/ - -/* Module load */ -static int __init -ioc4_init(void) -{ - return pci_register_driver(&ioc4_driver); -} - -/* Module unload */ -static void __exit -ioc4_exit(void) -{ - /* Ensure ioc4_load_modules() has completed before exiting */ - flush_work(&ioc4_load_modules_work); - pci_unregister_driver(&ioc4_driver); -} - -module_init(ioc4_init); -module_exit(ioc4_exit); - -MODULE_AUTHOR("Brent Casavant - Silicon Graphics, Inc. <bcasavan@sgi.com>"); -MODULE_DESCRIPTION("PCI driver master module for SGI IOC4 Base-IO Card"); -MODULE_LICENSE("GPL"); - -EXPORT_SYMBOL(ioc4_register_submodule); -EXPORT_SYMBOL(ioc4_unregister_submodule); diff --git a/drivers/misc/isl29020.c b/drivers/misc/isl29020.c index b6125620eb8f..fc5ff2805b94 100644 --- a/drivers/misc/isl29020.c +++ b/drivers/misc/isl29020.c @@ -173,6 +173,7 @@ static int isl29020_probe(struct i2c_client *client, static int isl29020_remove(struct i2c_client *client) { + pm_runtime_disable(&client->dev); sysfs_remove_group(&client->dev.kobj, &m_als_gr); return 0; } diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 057d7bbde402..dd65cedf3b12 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -16,7 +16,7 @@ #include <linux/types.h> #include <linux/platform_device.h> #include <linux/interrupt.h> -#include <linux/input-polldev.h> +#include <linux/input.h> #include <linux/delay.h> #include <linux/wait.h> #include <linux/poll.h> @@ -434,23 +434,23 @@ int lis3lv02d_poweron(struct lis3lv02d *lis3) EXPORT_SYMBOL_GPL(lis3lv02d_poweron); -static void lis3lv02d_joystick_poll(struct input_polled_dev *pidev) +static void lis3lv02d_joystick_poll(struct input_dev *input) { - struct lis3lv02d *lis3 = pidev->private; + struct lis3lv02d *lis3 = input_get_drvdata(input); int x, y, z; mutex_lock(&lis3->mutex); lis3lv02d_get_xyz(lis3, &x, &y, &z); - input_report_abs(pidev->input, ABS_X, x); - input_report_abs(pidev->input, ABS_Y, y); - input_report_abs(pidev->input, ABS_Z, z); - input_sync(pidev->input); + input_report_abs(input, ABS_X, x); + input_report_abs(input, ABS_Y, y); + input_report_abs(input, ABS_Z, z); + input_sync(input); mutex_unlock(&lis3->mutex); } -static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) +static int lis3lv02d_joystick_open(struct input_dev *input) { - struct lis3lv02d *lis3 = pidev->private; + struct lis3lv02d *lis3 = input_get_drvdata(input); if (lis3->pm_dev) pm_runtime_get_sync(lis3->pm_dev); @@ -461,12 +461,14 @@ static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) * Update coordinates for the case where poll interval is 0 and * the chip in running purely under interrupt control */ - lis3lv02d_joystick_poll(pidev); + lis3lv02d_joystick_poll(input); + + return 0; } -static void lis3lv02d_joystick_close(struct input_polled_dev *pidev) +static void lis3lv02d_joystick_close(struct input_dev *input) { - struct lis3lv02d *lis3 = pidev->private; + struct lis3lv02d *lis3 = input_get_drvdata(input); atomic_set(&lis3->wake_thread, 0); if (lis3->pm_dev) @@ -497,7 +499,7 @@ out: static void lis302dl_interrupt_handle_click(struct lis3lv02d *lis3) { - struct input_dev *dev = lis3->idev->input; + struct input_dev *dev = lis3->idev; u8 click_src; mutex_lock(&lis3->mutex); @@ -677,26 +679,19 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) if (lis3->idev) return -EINVAL; - lis3->idev = input_allocate_polled_device(); - if (!lis3->idev) + input_dev = input_allocate_device(); + if (!input_dev) return -ENOMEM; - lis3->idev->poll = lis3lv02d_joystick_poll; - lis3->idev->open = lis3lv02d_joystick_open; - lis3->idev->close = lis3lv02d_joystick_close; - lis3->idev->poll_interval = MDPS_POLL_INTERVAL; - lis3->idev->poll_interval_min = MDPS_POLL_MIN; - lis3->idev->poll_interval_max = MDPS_POLL_MAX; - lis3->idev->private = lis3; - input_dev = lis3->idev->input; - input_dev->name = "ST LIS3LV02DL Accelerometer"; input_dev->phys = DRIVER_NAME "/input0"; input_dev->id.bustype = BUS_HOST; input_dev->id.vendor = 0; input_dev->dev.parent = &lis3->pdev->dev; - set_bit(EV_ABS, input_dev->evbit); + input_dev->open = lis3lv02d_joystick_open; + input_dev->close = lis3lv02d_joystick_close; + max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY; if (lis3->whoami == WAI_12B) { fuzz = LIS3_DEFAULT_FUZZ_12B; @@ -712,17 +707,32 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat); input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat); + input_set_drvdata(input_dev, lis3); + lis3->idev = input_dev; + + err = input_setup_polling(input_dev, lis3lv02d_joystick_poll); + if (err) + goto err_free_input; + + input_set_poll_interval(input_dev, MDPS_POLL_INTERVAL); + input_set_min_poll_interval(input_dev, MDPS_POLL_MIN); + input_set_max_poll_interval(input_dev, MDPS_POLL_MAX); + lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns); lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns); lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns); - err = input_register_polled_device(lis3->idev); - if (err) { - input_free_polled_device(lis3->idev); - lis3->idev = NULL; - } + err = input_register_device(lis3->idev); + if (err) + goto err_free_input; + return 0; + +err_free_input: + input_free_device(input_dev); + lis3->idev = NULL; return err; + } EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable); @@ -738,8 +748,7 @@ void lis3lv02d_joystick_disable(struct lis3lv02d *lis3) if (lis3->irq) misc_deregister(&lis3->miscdev); - input_unregister_polled_device(lis3->idev); - input_free_polled_device(lis3->idev); + input_unregister_device(lis3->idev); lis3->idev = NULL; } EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable); @@ -895,10 +904,9 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *lis3, (p->click_thresh_y << 4)); if (lis3->idev) { - struct input_dev *input_dev = lis3->idev->input; - input_set_capability(input_dev, EV_KEY, BTN_X); - input_set_capability(input_dev, EV_KEY, BTN_Y); - input_set_capability(input_dev, EV_KEY, BTN_Z); + input_set_capability(lis3->idev, EV_KEY, BTN_X); + input_set_capability(lis3->idev, EV_KEY, BTN_Y); + input_set_capability(lis3->idev, EV_KEY, BTN_Z); } } diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h index 1b0c99883c57..c394c0b08519 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.h +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -6,7 +6,7 @@ * Copyright (C) 2008-2009 Eric Piel */ #include <linux/platform_device.h> -#include <linux/input-polldev.h> +#include <linux/input.h> #include <linux/regulator/consumer.h> #include <linux/miscdevice.h> @@ -281,7 +281,7 @@ struct lis3lv02d { * (1/1000th of earth gravity) */ - struct input_polled_dev *idev; /* input device */ + struct input_dev *idev; /* input device */ struct platform_device *pdev; /* platform device */ struct regulator_bulk_data regulators[2]; atomic_t count; /* interrupt count after last read */ diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index fb10eafe9bde..c70b3822013f 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -9,6 +9,7 @@ lkdtm-$(CONFIG_LKDTM) += refcount.o lkdtm-$(CONFIG_LKDTM) += rodata_objcopy.o lkdtm-$(CONFIG_LKDTM) += usercopy.o lkdtm-$(CONFIG_LKDTM) += stackleak.o +lkdtm-$(CONFIG_LKDTM) += cfi.o KASAN_SANITIZE_stackleak.o := n KCOV_INSTRUMENT_rodata.o := n diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index 1606658b9b7e..de87693cf557 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -12,6 +12,10 @@ #include <linux/sched/task_stack.h> #include <linux/uaccess.h> +#ifdef CONFIG_X86_32 +#include <asm/desc.h> +#endif + struct lkdtm_list { struct list_head node; }; @@ -22,7 +26,7 @@ struct lkdtm_list { * recurse past the end of THREAD_SIZE by default. */ #if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN > 0) -#define REC_STACK_SIZE (CONFIG_FRAME_WARN / 2) +#define REC_STACK_SIZE (_AC(CONFIG_FRAME_WARN, UL) / 2) #else #define REC_STACK_SIZE (THREAD_SIZE / 8) #endif @@ -75,7 +79,12 @@ static int warn_counter; void lkdtm_WARNING(void) { - WARN(1, "Warning message trigger count: %d\n", warn_counter++); + WARN_ON(++warn_counter); +} + +void lkdtm_WARNING_MESSAGE(void) +{ + WARN(1, "Warning message trigger count: %d\n", ++warn_counter); } void lkdtm_EXCEPTION(void) @@ -91,7 +100,7 @@ void lkdtm_LOOP(void) void lkdtm_EXHAUST_STACK(void) { - pr_info("Calling function with %d frame size to depth %d ...\n", + pr_info("Calling function with %lu frame size to depth %d ...\n", REC_STACK_SIZE, recur_count); recursive_loop(recur_count); pr_info("FAIL: survived without exhausting stack?!\n"); @@ -269,7 +278,7 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void) void lkdtm_UNSET_SMEP(void) { -#ifdef CONFIG_X86_64 +#if IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_UML) #define MOV_CR4_DEPTH 64 void (*direct_write_cr4)(unsigned long val); unsigned char *insn; @@ -329,6 +338,43 @@ void lkdtm_UNSET_SMEP(void) native_write_cr4(cr4); } #else - pr_err("FAIL: this test is x86_64-only\n"); + pr_err("XFAIL: this test is x86_64-only\n"); +#endif +} + +void lkdtm_DOUBLE_FAULT(void) +{ +#ifdef CONFIG_X86_32 + /* + * Trigger #DF by setting the stack limit to zero. This clobbers + * a GDT TLS slot, which is okay because the current task will die + * anyway due to the double fault. + */ + struct desc_struct d = { + .type = 3, /* expand-up, writable, accessed data */ + .p = 1, /* present */ + .d = 1, /* 32-bit */ + .g = 0, /* limit in bytes */ + .s = 1, /* not system */ + }; + + local_irq_disable(); + write_gdt_entry(get_cpu_gdt_rw(smp_processor_id()), + GDT_ENTRY_TLS_MIN, &d, DESCTYPE_S); + + /* + * Put our zero-limit segment in SS and then trigger a fault. The + * 4-byte access to (%esp) will fault with #SS, and the attempt to + * deliver the fault will recursively cause #SS and result in #DF. + * This whole process happens while NMIs and MCEs are blocked by the + * MOV SS window. This is nice because an NMI with an invalid SS + * would also double-fault, resulting in the NMI or MCE being lost. + */ + asm volatile ("movw %0, %%ss; addl $0, (%%esp)" :: + "r" ((unsigned short)(GDT_ENTRY_TLS_MIN << 3))); + + pr_err("FAIL: tried to double fault but didn't die\n"); +#else + pr_err("XFAIL: this test is ia32-only\n"); #endif } diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c new file mode 100644 index 000000000000..e73ebdbfa806 --- /dev/null +++ b/drivers/misc/lkdtm/cfi.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This is for all the tests relating directly to Control Flow Integrity. + */ +#include "lkdtm.h" + +static int called_count; + +/* Function taking one argument, without a return value. */ +static noinline void lkdtm_increment_void(int *counter) +{ + (*counter)++; +} + +/* Function taking one argument, returning int. */ +static noinline int lkdtm_increment_int(int *counter) +{ + (*counter)++; + + return *counter; +} +/* + * This tries to call an indirect function with a mismatched prototype. + */ +void lkdtm_CFI_FORWARD_PROTO(void) +{ + /* + * Matches lkdtm_increment_void()'s prototype, but not + * lkdtm_increment_int()'s prototype. + */ + void (*func)(int *); + + pr_info("Calling matched prototype ...\n"); + func = lkdtm_increment_void; + func(&called_count); + + pr_info("Calling mismatched prototype ...\n"); + func = (void *)lkdtm_increment_int; + func(&called_count); + + pr_info("Fail: survived mismatched prototype function call!\n"); +} diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 66ae6b2a6950..ee0d6e721441 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -104,6 +104,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(PANIC), CRASHTYPE(BUG), CRASHTYPE(WARNING), + CRASHTYPE(WARNING_MESSAGE), CRASHTYPE(EXCEPTION), CRASHTYPE(LOOP), CRASHTYPE(EXHAUST_STACK), @@ -169,6 +170,10 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(USERCOPY_KERNEL), CRASHTYPE(USERCOPY_KERNEL_DS), CRASHTYPE(STACKLEAK_ERASING), + CRASHTYPE(CFI_FORWARD_PROTO), +#ifdef CONFIG_X86_32 + CRASHTYPE(DOUBLE_FAULT), +#endif }; diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index 6a284a87a037..c56d23e37643 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -11,6 +11,7 @@ void __init lkdtm_bugs_init(int *recur_param); void lkdtm_PANIC(void); void lkdtm_BUG(void); void lkdtm_WARNING(void); +void lkdtm_WARNING_MESSAGE(void); void lkdtm_EXCEPTION(void); void lkdtm_LOOP(void); void lkdtm_EXHAUST_STACK(void); @@ -27,6 +28,9 @@ void lkdtm_CORRUPT_USER_DS(void); void lkdtm_STACK_GUARD_PAGE_LEADING(void); void lkdtm_STACK_GUARD_PAGE_TRAILING(void); void lkdtm_UNSET_SMEP(void); +#ifdef CONFIG_X86_32 +void lkdtm_DOUBLE_FAULT(void); +#endif /* lkdtm_heap.c */ void __init lkdtm_heap_init(void); @@ -95,4 +99,7 @@ void lkdtm_USERCOPY_KERNEL_DS(void); /* lkdtm_stackleak.c */ void lkdtm_STACKLEAK_ERASING(void); +/* cfi.c */ +void lkdtm_CFI_FORWARD_PROTO(void); + #endif diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c index 0a146b32da13..de7c5ab528d9 100644 --- a/drivers/misc/lkdtm/refcount.c +++ b/drivers/misc/lkdtm/refcount.c @@ -6,14 +6,6 @@ #include "lkdtm.h" #include <linux/refcount.h> -#ifdef CONFIG_REFCOUNT_FULL -#define REFCOUNT_MAX (UINT_MAX - 1) -#define REFCOUNT_SATURATED UINT_MAX -#else -#define REFCOUNT_MAX INT_MAX -#define REFCOUNT_SATURATED (INT_MIN / 2) -#endif - static void overflow_check(refcount_t *ref) { switch (refcount_read(ref)) { @@ -127,7 +119,7 @@ void lkdtm_REFCOUNT_DEC_ZERO(void) static void check_negative(refcount_t *ref, int start) { /* - * CONFIG_REFCOUNT_FULL refuses to move a refcount at all on an + * refcount_t refuses to move a refcount at all on an * over-sub, so we have to track our starting position instead of * looking only at zero-pinning. */ @@ -210,7 +202,6 @@ static void check_from_zero(refcount_t *ref) /* * A refcount_inc() from zero should pin to zero or saturate and may WARN. - * Only CONFIG_REFCOUNT_FULL provides this protection currently. */ void lkdtm_REFCOUNT_INC_ZERO(void) { diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 32e9b1aed2ca..9ad9c01ddf41 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -46,8 +46,6 @@ static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO; */ static void number_of_connections(struct mei_cl_device *cldev) { - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - if (cldev->me_cl->props.max_number_of_connections > 1) cldev->do_match = 0; } @@ -59,8 +57,6 @@ static void number_of_connections(struct mei_cl_device *cldev) */ static void blacklist(struct mei_cl_device *cldev) { - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - cldev->do_match = 0; } @@ -71,8 +67,6 @@ static void blacklist(struct mei_cl_device *cldev) */ static void whitelist(struct mei_cl_device *cldev) { - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - cldev->do_match = 1; } @@ -218,13 +212,21 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev) { int ret; + /* No need to enable the client if nothing is needed from it */ + if (!cldev->bus->fw_f_fw_ver_supported && + !cldev->bus->hbm_f_os_supported) + return; + ret = mei_cldev_enable(cldev); if (ret) return; - ret = mei_fwver(cldev); - if (ret < 0) - dev_err(&cldev->dev, "FW version command failed %d\n", ret); + if (cldev->bus->fw_f_fw_ver_supported) { + ret = mei_fwver(cldev); + if (ret < 0) + dev_err(&cldev->dev, "FW version command failed %d\n", + ret); + } if (cldev->bus->hbm_f_os_supported) { ret = mei_osver(cldev); @@ -248,7 +250,6 @@ static void mei_wd(struct mei_cl_device *cldev) { struct pci_dev *pdev = to_pci_dev(cldev->dev.parent); - dev_dbg(&cldev->dev, "running hook %s\n", __func__); if (pdev->device == MEI_DEV_ID_WPT_LP || pdev->device == MEI_DEV_ID_SPT || pdev->device == MEI_DEV_ID_SPT_H) @@ -402,8 +403,6 @@ static void mei_nfc(struct mei_cl_device *cldev) bus = cldev->bus; - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - mutex_lock(&bus->device_lock); /* we need to connect to INFO GUID */ cl = mei_cl_alloc_linked(bus); diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 985bd4fd3328..8d468e0a950a 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -765,7 +765,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *a, struct mei_cl_device *cldev = to_mei_cl_device(dev); const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); - return scnprintf(buf, PAGE_SIZE, "%pUl", uuid); + return sprintf(buf, "%pUl", uuid); } static DEVICE_ATTR_RO(uuid); @@ -775,7 +775,7 @@ static ssize_t version_show(struct device *dev, struct device_attribute *a, struct mei_cl_device *cldev = to_mei_cl_device(dev); u8 version = mei_me_cl_ver(cldev->me_cl); - return scnprintf(buf, PAGE_SIZE, "%02X", version); + return sprintf(buf, "%02X", version); } static DEVICE_ATTR_RO(version); @@ -791,11 +791,44 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a, } static DEVICE_ATTR_RO(modalias); +static ssize_t max_conn_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u8 maxconn = mei_me_cl_max_conn(cldev->me_cl); + + return sprintf(buf, "%d", maxconn); +} +static DEVICE_ATTR_RO(max_conn); + +static ssize_t fixed_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u8 fixed = mei_me_cl_fixed(cldev->me_cl); + + return sprintf(buf, "%d", fixed); +} +static DEVICE_ATTR_RO(fixed); + +static ssize_t max_len_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u32 maxlen = mei_me_cl_max_len(cldev->me_cl); + + return sprintf(buf, "%u", maxlen); +} +static DEVICE_ATTR_RO(max_len); + static struct attribute *mei_cldev_attrs[] = { &dev_attr_name.attr, &dev_attr_uuid.attr, &dev_attr_version.attr, &dev_attr_modalias.attr, + &dev_attr_max_conn.attr, + &dev_attr_fixed.attr, + &dev_attr_max_len.attr, NULL, }; ATTRIBUTE_GROUPS(mei_cldev); @@ -873,15 +906,16 @@ static const struct device_type mei_cl_device_type = { /** * mei_cl_bus_set_name - set device name for me client device + * <controller>-<client device> + * Example: 0000:00:16.0-55213584-9a29-4916-badf-0fb7ed682aeb * * @cldev: me client device */ static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev) { - dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X", - cldev->name, - mei_me_cl_uuid(cldev->me_cl), - mei_me_cl_ver(cldev->me_cl)); + dev_set_name(&cldev->dev, "%s-%pUl", + dev_name(cldev->bus->dev), + mei_me_cl_uuid(cldev->me_cl)); } /** diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index c1f9e810cf81..2f8954def591 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -69,6 +69,42 @@ static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl) return me_cl->props.protocol_version; } +/** + * mei_me_cl_max_conn - return me client max number of connections + * + * @me_cl: me client + * + * Return: me client max number of connections + */ +static inline u8 mei_me_cl_max_conn(const struct mei_me_client *me_cl) +{ + return me_cl->props.max_number_of_connections; +} + +/** + * mei_me_cl_fixed - return me client fixed address, if any + * + * @me_cl: me client + * + * Return: me client fixed address + */ +static inline u8 mei_me_cl_fixed(const struct mei_me_client *me_cl) +{ + return me_cl->props.fixed_address; +} + +/** + * mei_me_cl_max_len - return me client max msg length + * + * @me_cl: me client + * + * Return: me client max msg length + */ +static inline u32 mei_me_cl_max_len(const struct mei_me_client *me_cl) +{ + return me_cl->props.max_msg_length; +} + /* * MEI IO Functions */ diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c index c681f6fab342..4c596c646ac0 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.c +++ b/drivers/misc/mei/hdcp/mei_hdcp.c @@ -27,18 +27,6 @@ #include "mei_hdcp.h" -static inline u8 mei_get_ddi_index(enum port port) -{ - switch (port) { - case PORT_A: - return MEI_DDI_A; - case PORT_B ... PORT_F: - return (u8)port; - default: - return MEI_DDI_INVALID_PORT; - } -} - /** * mei_hdcp_initiate_session() - Initiate a Wired HDCP2.2 Tx Session in ME FW * @dev: device corresponding to the mei_cl_device @@ -69,7 +57,8 @@ mei_hdcp_initiate_session(struct device *dev, struct hdcp_port_data *data, WIRED_CMD_BUF_LEN_INITIATE_HDCP2_SESSION_IN; session_init_in.port.integrated_port_type = data->port_type; - session_init_in.port.physical_port = mei_get_ddi_index(data->port); + session_init_in.port.physical_port = (u8)data->fw_ddi; + session_init_in.port.attached_transcoder = (u8)data->fw_tc; session_init_in.protocol = data->protocol; byte = mei_cldev_send(cldev, (u8 *)&session_init_in, @@ -138,7 +127,8 @@ mei_hdcp_verify_receiver_cert_prepare_km(struct device *dev, WIRED_CMD_BUF_LEN_VERIFY_RECEIVER_CERT_IN; verify_rxcert_in.port.integrated_port_type = data->port_type; - verify_rxcert_in.port.physical_port = mei_get_ddi_index(data->port); + verify_rxcert_in.port.physical_port = (u8)data->fw_ddi; + verify_rxcert_in.port.attached_transcoder = (u8)data->fw_tc; verify_rxcert_in.cert_rx = rx_cert->cert_rx; memcpy(verify_rxcert_in.r_rx, &rx_cert->r_rx, HDCP_2_2_RRX_LEN); @@ -208,7 +198,8 @@ mei_hdcp_verify_hprime(struct device *dev, struct hdcp_port_data *data, send_hprime_in.header.buffer_len = WIRED_CMD_BUF_LEN_AKE_SEND_HPRIME_IN; send_hprime_in.port.integrated_port_type = data->port_type; - send_hprime_in.port.physical_port = mei_get_ddi_index(data->port); + send_hprime_in.port.physical_port = (u8)data->fw_ddi; + send_hprime_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(send_hprime_in.h_prime, rx_hprime->h_prime, HDCP_2_2_H_PRIME_LEN); @@ -265,7 +256,8 @@ mei_hdcp_store_pairing_info(struct device *dev, struct hdcp_port_data *data, WIRED_CMD_BUF_LEN_SEND_PAIRING_INFO_IN; pairing_info_in.port.integrated_port_type = data->port_type; - pairing_info_in.port.physical_port = mei_get_ddi_index(data->port); + pairing_info_in.port.physical_port = (u8)data->fw_ddi; + pairing_info_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(pairing_info_in.e_kh_km, pairing_info->e_kh_km, HDCP_2_2_E_KH_KM_LEN); @@ -323,7 +315,8 @@ mei_hdcp_initiate_locality_check(struct device *dev, lc_init_in.header.buffer_len = WIRED_CMD_BUF_LEN_INIT_LOCALITY_CHECK_IN; lc_init_in.port.integrated_port_type = data->port_type; - lc_init_in.port.physical_port = mei_get_ddi_index(data->port); + lc_init_in.port.physical_port = (u8)data->fw_ddi; + lc_init_in.port.attached_transcoder = (u8)data->fw_tc; byte = mei_cldev_send(cldev, (u8 *)&lc_init_in, sizeof(lc_init_in)); if (byte < 0) { @@ -378,7 +371,8 @@ mei_hdcp_verify_lprime(struct device *dev, struct hdcp_port_data *data, WIRED_CMD_BUF_LEN_VALIDATE_LOCALITY_IN; verify_lprime_in.port.integrated_port_type = data->port_type; - verify_lprime_in.port.physical_port = mei_get_ddi_index(data->port); + verify_lprime_in.port.physical_port = (u8)data->fw_ddi; + verify_lprime_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(verify_lprime_in.l_prime, rx_lprime->l_prime, HDCP_2_2_L_PRIME_LEN); @@ -435,7 +429,8 @@ static int mei_hdcp_get_session_key(struct device *dev, get_skey_in.header.buffer_len = WIRED_CMD_BUF_LEN_GET_SESSION_KEY_IN; get_skey_in.port.integrated_port_type = data->port_type; - get_skey_in.port.physical_port = mei_get_ddi_index(data->port); + get_skey_in.port.physical_port = (u8)data->fw_ddi; + get_skey_in.port.attached_transcoder = (u8)data->fw_tc; byte = mei_cldev_send(cldev, (u8 *)&get_skey_in, sizeof(get_skey_in)); if (byte < 0) { @@ -499,7 +494,8 @@ mei_hdcp_repeater_check_flow_prepare_ack(struct device *dev, WIRED_CMD_BUF_LEN_VERIFY_REPEATER_IN; verify_repeater_in.port.integrated_port_type = data->port_type; - verify_repeater_in.port.physical_port = mei_get_ddi_index(data->port); + verify_repeater_in.port.physical_port = (u8)data->fw_ddi; + verify_repeater_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(verify_repeater_in.rx_info, rep_topology->rx_info, HDCP_2_2_RXINFO_LEN); @@ -569,7 +565,8 @@ static int mei_hdcp_verify_mprime(struct device *dev, WIRED_CMD_BUF_LEN_REPEATER_AUTH_STREAM_REQ_MIN_IN; verify_mprime_in.port.integrated_port_type = data->port_type; - verify_mprime_in.port.physical_port = mei_get_ddi_index(data->port); + verify_mprime_in.port.physical_port = (u8)data->fw_ddi; + verify_mprime_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(verify_mprime_in.m_prime, stream_ready->m_prime, HDCP_2_2_MPRIME_LEN); @@ -630,7 +627,8 @@ static int mei_hdcp_enable_authentication(struct device *dev, enable_auth_in.header.buffer_len = WIRED_CMD_BUF_LEN_ENABLE_AUTH_IN; enable_auth_in.port.integrated_port_type = data->port_type; - enable_auth_in.port.physical_port = mei_get_ddi_index(data->port); + enable_auth_in.port.physical_port = (u8)data->fw_ddi; + enable_auth_in.port.attached_transcoder = (u8)data->fw_tc; enable_auth_in.stream_type = data->streams[0].stream_type; byte = mei_cldev_send(cldev, (u8 *)&enable_auth_in, @@ -684,7 +682,8 @@ mei_hdcp_close_session(struct device *dev, struct hdcp_port_data *data) WIRED_CMD_BUF_LEN_CLOSE_SESSION_IN; session_close_in.port.integrated_port_type = data->port_type; - session_close_in.port.physical_port = mei_get_ddi_index(data->port); + session_close_in.port.physical_port = (u8)data->fw_ddi; + session_close_in.port.attached_transcoder = (u8)data->fw_tc; byte = mei_cldev_send(cldev, (u8 *)&session_close_in, sizeof(session_close_in)); @@ -758,11 +757,38 @@ static const struct component_master_ops mei_component_master_ops = { .unbind = mei_component_master_unbind, }; +/** + * mei_hdcp_component_match - compare function for matching mei hdcp. + * + * The function checks if the driver is i915, the subcomponent is HDCP + * and the grand parent of hdcp and the parent of i915 are the same + * PCH device. + * + * @dev: master device + * @subcomponent: subcomponent to match (I915_COMPONENT_HDCP) + * @data: compare data (mei hdcp device) + * + * Return: + * * 1 - if components match + * * 0 - otherwise + */ static int mei_hdcp_component_match(struct device *dev, int subcomponent, void *data) { - return !strcmp(dev->driver->name, "i915") && - subcomponent == I915_COMPONENT_HDCP; + struct device *base = data; + + if (strcmp(dev->driver->name, "i915") || + subcomponent != I915_COMPONENT_HDCP) + return 0; + + base = base->parent; + if (!base) + return 0; + + base = base->parent; + dev = dev->parent; + + return (base && dev && dev == base); } static int mei_hdcp_probe(struct mei_cl_device *cldev, @@ -786,7 +812,7 @@ static int mei_hdcp_probe(struct mei_cl_device *cldev, master_match = NULL; component_match_add_typed(&cldev->dev, &master_match, - mei_hdcp_component_match, comp_master); + mei_hdcp_component_match, &cldev->dev); if (IS_ERR_OR_NULL(master_match)) { ret = -ENOMEM; goto err_exit; diff --git a/drivers/misc/mei/hdcp/mei_hdcp.h b/drivers/misc/mei/hdcp/mei_hdcp.h index e4b1cd54c853..18ffc773fa18 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.h +++ b/drivers/misc/mei/hdcp/mei_hdcp.h @@ -184,8 +184,11 @@ struct hdcp_cmd_no_data { /* Uniquely identifies the hdcp port being addressed for a given command. */ struct hdcp_port_id { u8 integrated_port_type; + /* physical_port is used until Gen11.5. Must be zero for Gen11.5+ */ u8 physical_port; - u16 reserved; + /* attached_transcoder is for Gen11.5+. Set to zero for <Gen11.5 */ + u8 attached_transcoder; + u8 reserved; } __packed; /* @@ -362,16 +365,4 @@ struct wired_cmd_repeater_auth_stream_req_out { struct hdcp_cmd_header header; struct hdcp_port_id port; } __packed; - -enum mei_fw_ddi { - MEI_DDI_INVALID_PORT = 0x0, - - MEI_DDI_B = 1, - MEI_DDI_C, - MEI_DDI_D, - MEI_DDI_E, - MEI_DDI_F, - MEI_DDI_A = 7, - MEI_DDI_RANGE_END = MEI_DDI_A, -}; #endif /* __MEI_HDCP_H__ */ diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 6c0173772162..87a0201ba6b3 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -79,8 +79,20 @@ #define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ #define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */ +#define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */ +#define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ + +#define MEI_DEV_ID_CMP_V 0xA3BA /* Comet Point Lake V */ + +#define MEI_DEV_ID_CMP_H 0x06e0 /* Comet Lake H */ +#define MEI_DEV_ID_CMP_H_3 0x06e4 /* Comet Lake H 3 (iTouch) */ + #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ +#define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */ + +#define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ + #define MEI_DEV_ID_MCC 0x4B70 /* Mule Creek Canyon (EHL) */ #define MEI_DEV_ID_MCC_4 0x4B75 /* Mule Creek Canyon 4 (EHL) */ @@ -157,7 +169,8 @@ access to ME_CBD */ #define ME_IS_HRA 0x00000002 /* ME Interrupt Enable HRA - host read only access to ME_IE */ #define ME_IE_HRA 0x00000001 - +/* TRC control shadow register */ +#define ME_TRC 0x00000030 /* H_HPG_CSR register bits */ #define H_HPG_CSR_PGIHEXR 0x00000001 diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index abe1b1f4362f..668418d7ea77 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2003-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2003-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -173,6 +173,27 @@ static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) } /** + * mei_me_trc_status - read trc status register + * + * @dev: mei device + * @trc: trc status register value + * + * Return: 0 on success, error otherwise + */ +static int mei_me_trc_status(struct mei_device *dev, u32 *trc) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (!hw->cfg->hw_trc_supported) + return -EOPNOTSUPP; + + *trc = mei_me_reg_read(hw, ME_TRC); + trace_mei_reg_read(dev->dev, "ME_TRC", ME_TRC, *trc); + + return 0; +} + +/** * mei_me_fw_status - read fw status register from pci config space * * @dev: mei device @@ -183,20 +204,19 @@ static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) static int mei_me_fw_status(struct mei_device *dev, struct mei_fw_status *fw_status) { - struct pci_dev *pdev = to_pci_dev(dev->dev); struct mei_me_hw *hw = to_me_hw(dev); const struct mei_fw_status *fw_src = &hw->cfg->fw_status; int ret; int i; - if (!fw_status) + if (!fw_status || !hw->read_fws) return -EINVAL; fw_status->count = fw_src->count; for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) { - ret = pci_read_config_dword(pdev, fw_src->status[i], - &fw_status->status[i]); - trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HSF_X", + ret = hw->read_fws(dev, fw_src->status[i], + &fw_status->status[i]); + trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_X", fw_src->status[i], fw_status->status[i]); if (ret) @@ -210,19 +230,26 @@ static int mei_me_fw_status(struct mei_device *dev, * mei_me_hw_config - configure hw dependent settings * * @dev: mei device + * + * Return: + * * -EINVAL when read_fws is not set + * * 0 on success + * */ -static void mei_me_hw_config(struct mei_device *dev) +static int mei_me_hw_config(struct mei_device *dev) { - struct pci_dev *pdev = to_pci_dev(dev->dev); struct mei_me_hw *hw = to_me_hw(dev); u32 hcsr, reg; + if (WARN_ON(!hw->read_fws)) + return -EINVAL; + /* Doesn't change in runtime */ hcsr = mei_hcsr_read(dev); hw->hbuf_depth = (hcsr & H_CBD) >> 24; reg = 0; - pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + hw->read_fws(dev, PCI_CFG_HFS_1, ®); trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg); hw->d0i3_supported = ((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK); @@ -233,6 +260,8 @@ static void mei_me_hw_config(struct mei_device *dev) if (reg & H_D0I3C_I3) hw->pg_state = MEI_PG_ON; } + + return 0; } /** @@ -269,7 +298,7 @@ static inline void me_intr_disable(struct mei_device *dev, u32 hcsr) } /** - * mei_me_intr_clear - clear and stop interrupts + * me_intr_clear - clear and stop interrupts * * @dev: the device structure * @hcsr: supplied hcsr register value @@ -323,9 +352,9 @@ static void mei_me_intr_disable(struct mei_device *dev) */ static void mei_me_synchronize_irq(struct mei_device *dev) { - struct pci_dev *pdev = to_pci_dev(dev->dev); + struct mei_me_hw *hw = to_me_hw(dev); - synchronize_irq(pdev->irq); + synchronize_irq(hw->irq); } /** @@ -1294,6 +1323,7 @@ end: static const struct mei_hw_ops mei_me_hw_ops = { + .trc_status = mei_me_trc_status, .fw_status = mei_me_fw_status, .pg_state = mei_me_pg_state, @@ -1355,6 +1385,8 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev) #define MEI_CFG_FW_SPS \ .quirk_probe = mei_me_fw_type_sps +#define MEI_CFG_FW_VER_SUPP \ + .fw_ver_supported = 1 #define MEI_CFG_ICH_HFS \ .fw_status.count = 0 @@ -1382,6 +1414,9 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev) .dma_size[DMA_DSCR_DEVICE] = SZ_128K, \ .dma_size[DMA_DSCR_CTRL] = PAGE_SIZE +#define MEI_CFG_TRC \ + .hw_trc_supported = 1 + /* ICH Legacy devices */ static const struct mei_cfg mei_me_ich_cfg = { MEI_CFG_ICH_HFS, @@ -1392,34 +1427,52 @@ static const struct mei_cfg mei_me_ich10_cfg = { MEI_CFG_ICH10_HFS, }; -/* PCH devices */ -static const struct mei_cfg mei_me_pch_cfg = { +/* PCH6 devices */ +static const struct mei_cfg mei_me_pch6_cfg = { MEI_CFG_PCH_HFS, }; +/* PCH7 devices */ +static const struct mei_cfg mei_me_pch7_cfg = { + MEI_CFG_PCH_HFS, + MEI_CFG_FW_VER_SUPP, +}; + /* PCH Cougar Point and Patsburg with quirk for Node Manager exclusion */ static const struct mei_cfg mei_me_pch_cpt_pbg_cfg = { MEI_CFG_PCH_HFS, + MEI_CFG_FW_VER_SUPP, MEI_CFG_FW_NM, }; /* PCH8 Lynx Point and newer devices */ static const struct mei_cfg mei_me_pch8_cfg = { MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, }; /* PCH8 Lynx Point with quirk for SPS Firmware exclusion */ static const struct mei_cfg mei_me_pch8_sps_cfg = { MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, MEI_CFG_FW_SPS, }; /* Cannon Lake and newer devices */ static const struct mei_cfg mei_me_pch12_cfg = { MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, MEI_CFG_DMA_128, }; +/* Tiger Lake and newer devices */ +static const struct mei_cfg mei_me_pch15_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, + MEI_CFG_DMA_128, + MEI_CFG_TRC, +}; + /* * mei_cfg_list - A list of platform platform specific configurations. * Note: has to be synchronized with enum mei_cfg_idx. @@ -1428,11 +1481,13 @@ static const struct mei_cfg *const mei_cfg_list[] = { [MEI_ME_UNDEF_CFG] = NULL, [MEI_ME_ICH_CFG] = &mei_me_ich_cfg, [MEI_ME_ICH10_CFG] = &mei_me_ich10_cfg, - [MEI_ME_PCH_CFG] = &mei_me_pch_cfg, + [MEI_ME_PCH6_CFG] = &mei_me_pch6_cfg, + [MEI_ME_PCH7_CFG] = &mei_me_pch7_cfg, [MEI_ME_PCH_CPT_PBG_CFG] = &mei_me_pch_cpt_pbg_cfg, [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg, [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg, [MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg, + [MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg, }; const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) @@ -1448,19 +1503,19 @@ const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) /** * mei_me_dev_init - allocates and initializes the mei device structure * - * @pdev: The pci device structure + * @parent: device associated with physical device (pci/platform) * @cfg: per device generation config * * Return: The mei_device pointer on success, NULL on failure. */ -struct mei_device *mei_me_dev_init(struct pci_dev *pdev, +struct mei_device *mei_me_dev_init(struct device *parent, const struct mei_cfg *cfg) { struct mei_device *dev; struct mei_me_hw *hw; int i; - dev = devm_kzalloc(&pdev->dev, sizeof(struct mei_device) + + dev = devm_kzalloc(parent, sizeof(struct mei_device) + sizeof(struct mei_me_hw), GFP_KERNEL); if (!dev) return NULL; @@ -1470,9 +1525,11 @@ struct mei_device *mei_me_dev_init(struct pci_dev *pdev, for (i = 0; i < DMA_DSCR_NUM; i++) dev->dr_dscr[i].size = cfg->dma_size[i]; - mei_device_init(dev, &pdev->dev, &mei_me_hw_ops); + mei_device_init(dev, parent, &mei_me_hw_ops); hw->cfg = cfg; + dev->fw_f_fw_ver_supported = cfg->fw_ver_supported; + return dev; } diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index 08c84a0de4a8..4a8d4dcd5a91 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2012-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2012-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -20,11 +20,15 @@ * @fw_status: FW status * @quirk_probe: device exclusion quirk * @dma_size: device DMA buffers size + * @fw_ver_supported: is fw version retrievable from FW + * @hw_trc_supported: does the hw support trc register */ struct mei_cfg { const struct mei_fw_status fw_status; bool (*quirk_probe)(struct pci_dev *pdev); size_t dma_size[DMA_DSCR_NUM]; + u32 fw_ver_supported:1; + u32 hw_trc_supported:1; }; @@ -40,16 +44,20 @@ struct mei_cfg { * * @cfg: per device generation config and ops * @mem_addr: io memory address + * @irq: irq number * @pg_state: power gating state * @d0i3_supported: di03 support * @hbuf_depth: depth of hardware host/write buffer in slots + * @read_fws: read FW status register handler */ struct mei_me_hw { const struct mei_cfg *cfg; void __iomem *mem_addr; + int irq; enum mei_pg_state pg_state; bool d0i3_supported; u8 hbuf_depth; + int (*read_fws)(const struct mei_device *dev, int where, u32 *val); }; #define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw) @@ -62,7 +70,8 @@ struct mei_me_hw { * @MEI_ME_UNDEF_CFG: Lower sentinel. * @MEI_ME_ICH_CFG: I/O Controller Hub legacy devices. * @MEI_ME_ICH10_CFG: I/O Controller Hub platforms Gen10 - * @MEI_ME_PCH_CFG: Platform Controller Hub platforms (Up to Gen8). + * @MEI_ME_PCH6_CFG: Platform Controller Hub platforms (Gen6). + * @MEI_ME_PCH7_CFG: Platform Controller Hub platforms (Gen7). * @MEI_ME_PCH_CPT_PBG_CFG:Platform Controller Hub workstations * with quirk for Node Manager exclusion. * @MEI_ME_PCH8_CFG: Platform Controller Hub Gen8 and newer @@ -71,23 +80,26 @@ struct mei_me_hw { * servers platforms with quirk for * SPS firmware exclusion. * @MEI_ME_PCH12_CFG: Platform Controller Hub Gen12 and newer + * @MEI_ME_PCH15_CFG: Platform Controller Hub Gen15 and newer * @MEI_ME_NUM_CFG: Upper Sentinel. */ enum mei_cfg_idx { MEI_ME_UNDEF_CFG, MEI_ME_ICH_CFG, MEI_ME_ICH10_CFG, - MEI_ME_PCH_CFG, + MEI_ME_PCH6_CFG, + MEI_ME_PCH7_CFG, MEI_ME_PCH_CPT_PBG_CFG, MEI_ME_PCH8_CFG, MEI_ME_PCH8_SPS_CFG, MEI_ME_PCH12_CFG, + MEI_ME_PCH15_CFG, MEI_ME_NUM_CFG, }; const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx); -struct mei_device *mei_me_dev_init(struct pci_dev *pdev, +struct mei_device *mei_me_dev_init(struct device *parent, const struct mei_cfg *cfg); int mei_me_pg_enter_sync(struct mei_device *dev); diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 5e58656b8e19..785b260b3ae9 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2013-2014, Intel Corporation. All rights reserved. + * Copyright (c) 2013-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -660,14 +660,16 @@ static int mei_txe_fw_status(struct mei_device *dev, } /** - * mei_txe_hw_config - configure hardware at the start of the devices + * mei_txe_hw_config - configure hardware at the start of the devices * * @dev: the device structure * * Configure hardware at the start of the device should be done only * once at the device probe time + * + * Return: always 0 */ -static void mei_txe_hw_config(struct mei_device *dev) +static int mei_txe_hw_config(struct mei_device *dev) { struct mei_txe_hw *hw = to_txe_hw(dev); @@ -677,6 +679,8 @@ static void mei_txe_hw_config(struct mei_device *dev) dev_dbg(dev->dev, "aliveness_resp = 0x%08x, readiness = 0x%08x.\n", hw->aliveness, hw->readiness); + + return 0; } /** diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index b9fef773e71b..bcee77768b91 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2012-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2012-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -190,7 +190,9 @@ int mei_start(struct mei_device *dev) /* acknowledge interrupt and stop interrupts */ mei_clear_interrupts(dev); - mei_hw_config(dev); + ret = mei_hw_config(dev); + if (ret) + goto err; dev_dbg(dev->dev, "reset in start the mei device.\n"); diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index f894d1f8a53e..f17297f2943d 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -533,24 +533,6 @@ out: } /** - * mei_compat_ioctl - the compat IOCTL function - * - * @file: pointer to file structure - * @cmd: ioctl command - * @data: pointer to mei message structure - * - * Return: 0 on success , <0 on error - */ -#ifdef CONFIG_COMPAT -static long mei_compat_ioctl(struct file *file, - unsigned int cmd, unsigned long data) -{ - return mei_ioctl(file, cmd, (unsigned long)compat_ptr(data)); -} -#endif - - -/** * mei_poll - the poll function * * @file: pointer to file structure @@ -701,6 +683,29 @@ static int mei_fasync(int fd, struct file *file, int band) } /** + * trc_show - mei device trc attribute show method + * + * @device: device pointer + * @attr: attribute pointer + * @buf: char out buffer + * + * Return: number of the bytes printed into buf or error + */ +static ssize_t trc_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mei_device *dev = dev_get_drvdata(device); + u32 trc; + int ret; + + ret = mei_trc_status(dev, &trc); + if (ret) + return ret; + return sprintf(buf, "%08X\n", trc); +} +static DEVICE_ATTR_RO(trc); + +/** * fw_status_show - mei device fw_status attribute show method * * @device: device pointer @@ -858,13 +863,6 @@ static ssize_t dev_state_show(struct device *device, } static DEVICE_ATTR_RO(dev_state); -static int match_devt(struct device *dev, const void *data) -{ - const dev_t *devt = data; - - return dev->devt == *devt; -} - /** * dev_set_devstate: set to new device state and notify sysfs file. * @@ -880,7 +878,7 @@ void mei_set_devstate(struct mei_device *dev, enum mei_dev_state state) dev->dev_state = state; - clsdev = class_find_device(mei_class, NULL, &dev->cdev.dev, match_devt); + clsdev = class_find_device_by_devt(mei_class, dev->cdev.dev); if (clsdev) { sysfs_notify(&clsdev->kobj, NULL, "dev_state"); put_device(clsdev); @@ -894,6 +892,7 @@ static struct attribute *mei_attrs[] = { &dev_attr_tx_queue_limit.attr, &dev_attr_fw_ver.attr, &dev_attr_dev_state.attr, + &dev_attr_trc.attr, NULL }; ATTRIBUTE_GROUPS(mei); @@ -905,9 +904,7 @@ static const struct file_operations mei_fops = { .owner = THIS_MODULE, .read = mei_read, .unlocked_ioctl = mei_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = mei_compat_ioctl, -#endif + .compat_ioctl = compat_ptr_ioctl, .open = mei_open, .release = mei_release, .write = mei_write, diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index f71a023aed3c..76f8ff5ff974 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2003-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2003-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -260,6 +260,7 @@ struct mei_cl { * @hw_config : configure hw * * @fw_status : get fw status registers + * @trc_status : get trc status register * @pg_state : power gating state of the device * @pg_in_transition : is device now in pg transition * @pg_is_enabled : is power gating enabled @@ -287,9 +288,11 @@ struct mei_hw_ops { bool (*hw_is_ready)(struct mei_device *dev); int (*hw_reset)(struct mei_device *dev, bool enable); int (*hw_start)(struct mei_device *dev); - void (*hw_config)(struct mei_device *dev); + int (*hw_config)(struct mei_device *dev); int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts); + int (*trc_status)(struct mei_device *dev, u32 *trc); + enum mei_pg_state (*pg_state)(struct mei_device *dev); bool (*pg_in_transition)(struct mei_device *dev); bool (*pg_is_enabled)(struct mei_device *dev); @@ -426,6 +429,8 @@ struct mei_fw_version { * * @fw_ver : FW versions * + * @fw_f_fw_ver_supported : fw feature: fw version supported + * * @me_clients_rwsem: rw lock over me_clients list * @me_clients : list of FW clients * @me_clients_map : FW clients bit map @@ -506,6 +511,8 @@ struct mei_device { struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS]; + unsigned int fw_f_fw_ver_supported:1; + struct rw_semaphore me_clients_rwsem; struct list_head me_clients; DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX); @@ -610,9 +617,9 @@ void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list); */ -static inline void mei_hw_config(struct mei_device *dev) +static inline int mei_hw_config(struct mei_device *dev) { - dev->ops->hw_config(dev); + return dev->ops->hw_config(dev); } static inline enum mei_pg_state mei_pg_state(struct mei_device *dev) @@ -707,6 +714,13 @@ static inline int mei_count_full_read_slots(struct mei_device *dev) return dev->ops->rdbuf_full_slots(dev); } +static inline int mei_trc_status(struct mei_device *dev, u32 *trc) +{ + if (dev->ops->trc_status) + return dev->ops->trc_status(dev, trc); + return -EOPNOTSUPP; +} + static inline int mei_fw_status(struct mei_device *dev, struct mei_fw_status *fw_status) { diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 57cb68f5cc64..2711451b3d87 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -61,13 +61,13 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, MEI_ME_ICH10_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, MEI_ME_ICH10_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH6_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH6_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, MEI_ME_PCH_CPT_PBG_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, MEI_ME_PCH_CPT_PBG_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)}, @@ -96,9 +96,19 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_JSP_N, MEI_ME_PCH15_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)}, /* required last entry */ @@ -115,6 +125,13 @@ static inline void mei_me_set_pm_domain(struct mei_device *dev) {} static inline void mei_me_unset_pm_domain(struct mei_device *dev) {} #endif /* CONFIG_PM */ +static int mei_me_read_fws(const struct mei_device *dev, int where, u32 *val) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + return pci_read_config_dword(pdev, where, val); +} + /** * mei_me_quirk_probe - probe for devices that doesn't valid ME interface * @@ -186,13 +203,15 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* allocates and initializes the mei dev structure */ - dev = mei_me_dev_init(pdev, cfg); + dev = mei_me_dev_init(&pdev->dev, cfg); if (!dev) { err = -ENOMEM; goto end; } hw = to_me_hw(dev); hw->mem_addr = pcim_iomap_table(pdev)[0]; + hw->irq = pdev->irq; + hw->read_fws = mei_me_read_fws; pci_enable_msi(pdev); @@ -381,12 +400,11 @@ static int mei_me_pci_resume(struct device *device) #ifdef CONFIG_PM static int mei_me_pm_runtime_idle(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); struct mei_device *dev; - dev_dbg(&pdev->dev, "rpm: me: runtime_idle\n"); + dev_dbg(device, "rpm: me: runtime_idle\n"); - dev = pci_get_drvdata(pdev); + dev = dev_get_drvdata(device); if (!dev) return -ENODEV; if (mei_write_is_idle(dev)) @@ -397,13 +415,12 @@ static int mei_me_pm_runtime_idle(struct device *device) static int mei_me_pm_runtime_suspend(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); struct mei_device *dev; int ret; - dev_dbg(&pdev->dev, "rpm: me: runtime suspend\n"); + dev_dbg(device, "rpm: me: runtime suspend\n"); - dev = pci_get_drvdata(pdev); + dev = dev_get_drvdata(device); if (!dev) return -ENODEV; @@ -416,7 +433,7 @@ static int mei_me_pm_runtime_suspend(struct device *device) mutex_unlock(&dev->device_lock); - dev_dbg(&pdev->dev, "rpm: me: runtime suspend ret=%d\n", ret); + dev_dbg(device, "rpm: me: runtime suspend ret=%d\n", ret); if (ret && ret != -EAGAIN) schedule_work(&dev->reset_work); @@ -426,13 +443,12 @@ static int mei_me_pm_runtime_suspend(struct device *device) static int mei_me_pm_runtime_resume(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); struct mei_device *dev; int ret; - dev_dbg(&pdev->dev, "rpm: me: runtime resume\n"); + dev_dbg(device, "rpm: me: runtime resume\n"); - dev = pci_get_drvdata(pdev); + dev = dev_get_drvdata(device); if (!dev) return -ENODEV; @@ -442,7 +458,7 @@ static int mei_me_pm_runtime_resume(struct device *device) mutex_unlock(&dev->device_lock); - dev_dbg(&pdev->dev, "rpm: me: runtime resume ret = %d\n", ret); + dev_dbg(device, "rpm: me: runtime resume ret = %d\n", ret); if (ret) schedule_work(&dev->reset_work); diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c index 2e37fc2e0fa8..f1c16a587495 100644 --- a/drivers/misc/mei/pci-txe.c +++ b/drivers/misc/mei/pci-txe.c @@ -276,12 +276,11 @@ static int mei_txe_pci_resume(struct device *device) #ifdef CONFIG_PM static int mei_txe_pm_runtime_idle(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); struct mei_device *dev; - dev_dbg(&pdev->dev, "rpm: txe: runtime_idle\n"); + dev_dbg(device, "rpm: txe: runtime_idle\n"); - dev = pci_get_drvdata(pdev); + dev = dev_get_drvdata(device); if (!dev) return -ENODEV; if (mei_write_is_idle(dev)) @@ -291,13 +290,12 @@ static int mei_txe_pm_runtime_idle(struct device *device) } static int mei_txe_pm_runtime_suspend(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); struct mei_device *dev; int ret; - dev_dbg(&pdev->dev, "rpm: txe: runtime suspend\n"); + dev_dbg(device, "rpm: txe: runtime suspend\n"); - dev = pci_get_drvdata(pdev); + dev = dev_get_drvdata(device); if (!dev) return -ENODEV; @@ -310,7 +308,7 @@ static int mei_txe_pm_runtime_suspend(struct device *device) /* keep irq on we are staying in D0 */ - dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret); + dev_dbg(device, "rpm: txe: runtime suspend ret=%d\n", ret); mutex_unlock(&dev->device_lock); @@ -322,13 +320,12 @@ static int mei_txe_pm_runtime_suspend(struct device *device) static int mei_txe_pm_runtime_resume(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); struct mei_device *dev; int ret; - dev_dbg(&pdev->dev, "rpm: txe: runtime resume\n"); + dev_dbg(device, "rpm: txe: runtime resume\n"); - dev = pci_get_drvdata(pdev); + dev = dev_get_drvdata(device); if (!dev) return -ENODEV; @@ -340,7 +337,7 @@ static int mei_txe_pm_runtime_resume(struct device *device) mutex_unlock(&dev->device_lock); - dev_dbg(&pdev->dev, "rpm: txe: runtime resume ret = %d\n", ret); + dev_dbg(device, "rpm: txe: runtime resume ret = %d\n", ret); if (ret) schedule_work(&dev->reset_work); diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index 948f45bbf135..b6841ba6d922 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menu "Intel MIC & related support" -comment "Intel MIC Bus Driver" - config INTEL_MIC_BUS tristate "Intel MIC Bus Driver" depends on 64BIT && PCI && X86 @@ -18,8 +16,6 @@ config INTEL_MIC_BUS OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "SCIF Bus Driver" - config SCIF_BUS tristate "SCIF Bus Driver" depends on 64BIT && PCI && X86 @@ -35,8 +31,6 @@ config SCIF_BUS OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "VOP Bus Driver" - config VOP_BUS tristate "VOP Bus Driver" help @@ -51,8 +45,6 @@ config VOP_BUS OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "Intel MIC Host Driver" - config INTEL_MIC_HOST tristate "Intel MIC Host Driver" depends on 64BIT && PCI && X86 @@ -71,8 +63,6 @@ config INTEL_MIC_HOST OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "Intel MIC Card Driver" - config INTEL_MIC_CARD tristate "Intel MIC Card Driver" depends on 64BIT && X86 @@ -90,8 +80,6 @@ config INTEL_MIC_CARD For more information see <http://software.intel.com/en-us/mic-developer>. -comment "SCIF Driver" - config SCIF tristate "SCIF Driver" depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT @@ -110,8 +98,6 @@ config SCIF OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "Intel MIC Coprocessor State Management (COSM) Drivers" - config MIC_COSM tristate "Intel MIC Coprocessor State Management (COSM) Drivers" depends on 64BIT && PCI && X86 && SCIF @@ -128,8 +114,6 @@ config MIC_COSM OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "VOP Driver" - config VOP tristate "VOP Driver" depends on VOP_BUS diff --git a/drivers/misc/mic/card/mic_debugfs.c b/drivers/misc/mic/card/mic_debugfs.c index 3ee3d2402634..b58608829b18 100644 --- a/drivers/misc/mic/card/mic_debugfs.c +++ b/drivers/misc/mic/card/mic_debugfs.c @@ -65,9 +65,6 @@ void __init mic_create_card_debug_dir(struct mic_driver *mdrv) */ void mic_delete_card_debug_dir(struct mic_driver *mdrv) { - if (!mdrv->dbg_dir) - return; - debugfs_remove_recursive(mdrv->dbg_dir); } diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c index 266ffb6f6c44..c8bff2916d3d 100644 --- a/drivers/misc/mic/card/mic_x100.c +++ b/drivers/misc/mic/card/mic_x100.c @@ -237,6 +237,9 @@ static int __init mic_probe(struct platform_device *pdev) mdrv->dev = &pdev->dev; snprintf(mdrv->name, sizeof(mic_driver_name), mic_driver_name); + /* FIXME: use dma_set_mask_and_coherent() and check result */ + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + mdev->mmio.pa = MIC_X100_MMIO_BASE; mdev->mmio.len = MIC_X100_MMIO_LEN; mdev->mmio.va = devm_ioremap(&pdev->dev, MIC_X100_MMIO_BASE, @@ -282,18 +285,6 @@ static void mic_platform_shutdown(struct platform_device *pdev) mic_remove(pdev); } -static u64 mic_dma_mask = DMA_BIT_MASK(64); - -static struct platform_device mic_platform_dev = { - .name = mic_driver_name, - .id = 0, - .num_resources = 0, - .dev = { - .dma_mask = &mic_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), - }, -}; - static struct platform_driver __refdata mic_platform_driver = { .probe = mic_probe, .remove = mic_remove, @@ -303,6 +294,8 @@ static struct platform_driver __refdata mic_platform_driver = { }, }; +static struct platform_device *mic_platform_dev; + static int __init mic_init(void) { int ret; @@ -316,9 +309,12 @@ static int __init mic_init(void) request_module("mic_x100_dma"); mic_init_card_debugfs(); - ret = platform_device_register(&mic_platform_dev); + + mic_platform_dev = platform_device_register_simple(mic_driver_name, + 0, NULL, 0); + ret = PTR_ERR_OR_ZERO(mic_platform_dev); if (ret) { - pr_err("platform_device_register ret %d\n", ret); + pr_err("platform_device_register_full ret %d\n", ret); goto cleanup_debugfs; } ret = platform_driver_register(&mic_platform_driver); @@ -329,7 +325,7 @@ static int __init mic_init(void) return ret; device_unregister: - platform_device_unregister(&mic_platform_dev); + platform_device_unregister(mic_platform_dev); cleanup_debugfs: mic_exit_card_debugfs(); done: @@ -339,7 +335,7 @@ done: static void __exit mic_exit(void) { platform_driver_unregister(&mic_platform_driver); - platform_device_unregister(&mic_platform_dev); + platform_device_unregister(mic_platform_dev); mic_exit_card_debugfs(); } diff --git a/drivers/misc/mic/cosm/cosm_debugfs.c b/drivers/misc/mic/cosm/cosm_debugfs.c index 2fc9f4bf7001..68a731fd86de 100644 --- a/drivers/misc/mic/cosm/cosm_debugfs.c +++ b/drivers/misc/mic/cosm/cosm_debugfs.c @@ -102,9 +102,6 @@ void cosm_create_debug_dir(struct cosm_device *cdev) void cosm_delete_debug_dir(struct cosm_device *cdev) { - if (!cdev->dbg_dir) - return; - debugfs_remove_recursive(cdev->dbg_dir); } diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c index 8a8e41677501..ab0db7a2ac8c 100644 --- a/drivers/misc/mic/host/mic_debugfs.c +++ b/drivers/misc/mic/host/mic_debugfs.c @@ -129,9 +129,6 @@ void mic_create_debug_dir(struct mic_device *mdev) */ void mic_delete_debug_dir(struct mic_device *mdev) { - if (!mdev->dbg_dir) - return; - debugfs_remove_recursive(mdev->dbg_dir); } diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h index d3837f8a5ba0..0b9dfe1cc06c 100644 --- a/drivers/misc/mic/scif/scif_epd.h +++ b/drivers/misc/mic/scif/scif_epd.h @@ -156,9 +156,8 @@ static inline int scif_verify_epd(struct scif_endpt *ep) static inline int scif_anon_inode_getfile(scif_epd_t epd) { epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0); - if (IS_ERR(epd->anon)) - return PTR_ERR(epd->anon); - return 0; + + return PTR_ERR_OR_ZERO(epd->anon); } static inline void scif_anon_inode_fput(scif_epd_t epd) diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c index c25fd40f3bd0..fcd999f50d14 100644 --- a/drivers/misc/mic/scif/scif_nodeqp.c +++ b/drivers/misc/mic/scif/scif_nodeqp.c @@ -788,7 +788,7 @@ scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg) "failed to setup interrupts for %d\n", msg->src.node); goto interrupt_setup_error; } - newdev->mmio.va = ioremap_nocache(msg->payload[1], sdev->mmio->len); + newdev->mmio.va = ioremap(msg->payload[1], sdev->mmio->len); if (!newdev->mmio.va) { dev_err(&scifdev->sdev->dev, "failed to map mmio for %d\n", msg->src.node); diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig index 1916fa65f2f2..2d2266c1439e 100644 --- a/drivers/misc/ocxl/Kconfig +++ b/drivers/misc/ocxl/Kconfig @@ -11,6 +11,7 @@ config OCXL tristate "OpenCAPI coherent accelerator support" depends on PPC_POWERNV && PCI && EEH select OCXL_BASE + select HOTPLUG_PCI_POWERNV default m help Select this option to enable the ocxl driver for Open diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index 994563a078eb..de8a66b9d76b 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -10,18 +10,17 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu, int pasid; struct ocxl_context *ctx; - *context = kzalloc(sizeof(struct ocxl_context), GFP_KERNEL); - if (!*context) + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) return -ENOMEM; - ctx = *context; - ctx->afu = afu; mutex_lock(&afu->contexts_lock); pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base, afu->pasid_base + afu->pasid_max, GFP_KERNEL); if (pasid < 0) { mutex_unlock(&afu->contexts_lock); + kfree(ctx); return pasid; } afu->pasid_count++; @@ -43,6 +42,7 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu, * duration of the life of the context */ ocxl_afu_get(afu); + *context = ctx; return 0; } EXPORT_SYMBOL_GPL(ocxl_context_alloc); diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 2870c25da166..4d1b44de1492 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -18,18 +18,15 @@ static struct class *ocxl_class; static struct mutex minors_idr_lock; static struct idr minors_idr; -static struct ocxl_file_info *find_file_info(dev_t devno) +static struct ocxl_file_info *find_and_get_file_info(dev_t devno) { struct ocxl_file_info *info; - /* - * We don't declare an RCU critical section here, as our AFU - * is protected by a reference counter on the device. By the time the - * info reference is removed from the idr, the ref count of - * the device is already at 0, so no user API will access that AFU and - * this function can't return it. - */ + mutex_lock(&minors_idr_lock); info = idr_find(&minors_idr, MINOR(devno)); + if (info) + get_device(&info->dev); + mutex_unlock(&minors_idr_lock); return info; } @@ -58,14 +55,16 @@ static int afu_open(struct inode *inode, struct file *file) pr_debug("%s for device %x\n", __func__, inode->i_rdev); - info = find_file_info(inode->i_rdev); + info = find_and_get_file_info(inode->i_rdev); if (!info) return -ENODEV; rc = ocxl_context_alloc(&ctx, info->afu, inode->i_mapping); - if (rc) + if (rc) { + put_device(&info->dev); return rc; - + } + put_device(&info->dev); file->private_data = ctx; return 0; } @@ -487,7 +486,6 @@ static void info_release(struct device *dev) { struct ocxl_file_info *info = container_of(dev, struct ocxl_file_info, dev); - free_minor(info); ocxl_afu_put(info->afu); kfree(info); } @@ -577,6 +575,7 @@ void ocxl_file_unregister_afu(struct ocxl_afu *afu) ocxl_file_make_invisible(info); ocxl_sysfs_unregister_afu(info); + free_minor(info); device_unregister(&info->dev); } diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c index 7210d9e059be..ef73cf35dda2 100644 --- a/drivers/misc/ocxl/main.c +++ b/drivers/misc/ocxl/main.c @@ -2,12 +2,16 @@ // Copyright 2017 IBM Corp. #include <linux/module.h> #include <linux/pci.h> +#include <asm/mmu.h> #include "ocxl_internal.h" static int __init init_ocxl(void) { int rc = 0; + if (!tlbie_capable) + return -EINVAL; + rc = ocxl_file_init(); if (rc) return rc; diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 97415afd79f3..345bf843a38e 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright 2017 IBM Corp. #ifndef _OCXL_INTERNAL_H_ #define _OCXL_INTERNAL_H_ diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h index 024f417e7e01..17e21cb2addd 100644 --- a/drivers/misc/ocxl/trace.h +++ b/drivers/misc/ocxl/trace.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright 2017 IBM Corp. #undef TRACE_SYSTEM #define TRACE_SYSTEM ocxl diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 6e208a060a58..a5e317073d95 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -94,7 +94,7 @@ enum pci_barno { struct pci_endpoint_test { struct pci_dev *pdev; void __iomem *base; - void __iomem *bar[6]; + void __iomem *bar[PCI_STD_NUM_BARS]; struct completion irq_raised; int last_irq; int num_irqs; @@ -687,7 +687,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, if (!pci_endpoint_test_request_irq(test)) goto err_disable_irq; - for (bar = BAR_0; bar <= BAR_5; bar++) { + for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { base = pci_ioremap_bar(pdev, bar); if (!base) { @@ -740,7 +740,7 @@ err_ida_remove: ida_simple_remove(&pci_endpoint_test_ida, id); err_iounmap: - for (bar = BAR_0; bar <= BAR_5; bar++) { + for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (test->bar[bar]) pci_iounmap(pdev, test->bar[bar]); } @@ -771,7 +771,7 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) misc_deregister(&test->miscdev); kfree(misc_device->name); ida_simple_remove(&pci_endpoint_test_ida, id); - for (bar = BAR_0; bar <= BAR_5; bar++) { + for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (test->bar[bar]) pci_iounmap(pdev, test->bar[bar]); } diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c index 359c5bab45ac..b7f510676cd6 100644 --- a/drivers/misc/pti.c +++ b/drivers/misc/pti.c @@ -792,7 +792,7 @@ static int pti_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned int a; - int retval = -EINVAL; + int retval; int pci_bar = 1; dev_dbg(&pdev->dev, "%s %s(%d): PTI PCI ID %04x:%04x\n", __FILE__, @@ -834,7 +834,7 @@ static int pti_pci_probe(struct pci_dev *pdev, } drv_data->aperture_base = drv_data->pti_addr+APERTURE_14; drv_data->pti_ioaddr = - ioremap_nocache((u32)drv_data->aperture_base, + ioremap((u32)drv_data->aperture_base, APERTURE_LEN); if (!drv_data->pti_ioaddr) { retval = -ENOMEM; @@ -910,7 +910,7 @@ static struct pci_driver pti_pci_driver = { */ static int __init pti_init(void) { - int retval = -EINVAL; + int retval; /* First register module as tty device */ diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c index 95ff7c5a1dfb..a6e1a8983e1f 100644 --- a/drivers/misc/pvpanic.c +++ b/drivers/misc/pvpanic.c @@ -10,16 +10,16 @@ #include <linux/acpi.h> #include <linux/kernel.h> +#include <linux/kexec.h> #include <linux/module.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/platform_device.h> #include <linux/types.h> +#include <uapi/misc/pvpanic.h> static void __iomem *base; -#define PVPANIC_PANICKED (1 << 0) - MODULE_AUTHOR("Hu Tao <hutao@cn.fujitsu.com>"); MODULE_DESCRIPTION("pvpanic device driver"); MODULE_LICENSE("GPL"); @@ -34,7 +34,13 @@ static int pvpanic_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) { - pvpanic_send_event(PVPANIC_PANICKED); + unsigned int event = PVPANIC_PANICKED; + + if (kexec_crash_loaded()) + event = PVPANIC_CRASH_LOADED; + + pvpanic_send_event(event); + return NOTIFY_DONE; } diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index a2a142ae087b..9d042310214f 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -573,6 +573,7 @@ static void __exit gru_exit(void) gru_free_tables(); misc_deregister(&gru_miscdev); gru_proc_exit(); + mmu_notifier_synchronize(); } static const struct file_operations gru_fops = { diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 3a8d76d1ccae..97b8b38ab47d 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -119,7 +119,7 @@ static int mcs_statistics_show(struct seq_file *s, void *p) "cch_interrupt_sync", "cch_deallocate", "tfh_write_only", "tfh_write_restart", "tgh_invalidate"}; - seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks"); + seq_puts(s, "#id count aver-clks max-clks\n"); for (op = 0; op < mcsop_last; op++) { count = atomic_long_read(&mcs_op_statistics[op].count); total = atomic_long_read(&mcs_op_statistics[op].total); @@ -165,8 +165,7 @@ static int cch_seq_show(struct seq_file *file, void *data) const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; if (gid == 0) - seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid", - "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode"); + seq_puts(file, "# gid bid ctx# asid pid cbrs dsbytes mode\n"); if (gru) for (i = 0; i < GRU_NUM_CCH; i++) { ts = gru->gs_gts[i]; @@ -191,10 +190,8 @@ static int gru_seq_show(struct seq_file *file, void *data) struct gru_state *gru = GID_TO_GRU(gid); if (gid == 0) { - seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid", - "ctx", "cbr", "dsr", "ctx", "cbr", "dsr"); - seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy", - "busy", "busy", "free", "free", "free"); + seq_puts(file, "# gid nid ctx cbr dsr ctx cbr dsr\n"); + seq_puts(file, "# busy busy busy free free free\n"); } if (gru) { ctxfree = GRU_NUM_CCH - gru->gs_active_contexts; @@ -258,28 +255,28 @@ static int options_open(struct inode *inode, struct file *file) } /* *INDENT-OFF* */ -static const struct file_operations statistics_fops = { - .open = statistics_open, - .read = seq_read, - .write = statistics_write, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops statistics_proc_ops = { + .proc_open = statistics_open, + .proc_read = seq_read, + .proc_write = statistics_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; -static const struct file_operations mcs_statistics_fops = { - .open = mcs_statistics_open, - .read = seq_read, - .write = mcs_statistics_write, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops mcs_statistics_proc_ops = { + .proc_open = mcs_statistics_open, + .proc_read = seq_read, + .proc_write = mcs_statistics_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; -static const struct file_operations options_fops = { - .open = options_open, - .read = seq_read, - .write = options_write, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops options_proc_ops = { + .proc_open = options_open, + .proc_read = seq_read, + .proc_write = options_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; static struct proc_dir_entry *proc_gru __read_mostly; @@ -289,11 +286,11 @@ int gru_proc_init(void) proc_gru = proc_mkdir("sgi_uv/gru", NULL); if (!proc_gru) return -1; - if (!proc_create("statistics", 0644, proc_gru, &statistics_fops)) + if (!proc_create("statistics", 0644, proc_gru, &statistics_proc_ops)) goto err; - if (!proc_create("mcs_statistics", 0644, proc_gru, &mcs_statistics_fops)) + if (!proc_create("mcs_statistics", 0644, proc_gru, &mcs_statistics_proc_ops)) goto err; - if (!proc_create("debug_options", 0644, proc_gru, &options_fops)) + if (!proc_create("debug_options", 0644, proc_gru, &options_proc_ops)) goto err; if (!proc_create_seq("cch_status", 0444, proc_gru, &cch_seq_ops)) goto err; diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 438191c22057..a7e44b2eb413 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -307,10 +307,8 @@ struct gru_mm_tracker { /* pack to reduce size */ struct gru_mm_struct { struct mmu_notifier ms_notifier; - atomic_t ms_refcnt; spinlock_t ms_asid_lock; /* protects ASID assignment */ atomic_t ms_range_active;/* num range_invals active */ - char ms_released; wait_queue_head_t ms_wait_queue; DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 59ba0adf23ce..10921cd2608d 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -235,83 +235,47 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn, gms, range->start, range->end); } -static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) +static struct mmu_notifier *gru_alloc_notifier(struct mm_struct *mm) { - struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, - ms_notifier); + struct gru_mm_struct *gms; + + gms = kzalloc(sizeof(*gms), GFP_KERNEL); + if (!gms) + return ERR_PTR(-ENOMEM); + STAT(gms_alloc); + spin_lock_init(&gms->ms_asid_lock); + init_waitqueue_head(&gms->ms_wait_queue); - gms->ms_released = 1; - gru_dbg(grudev, "gms %p\n", gms); + return &gms->ms_notifier; } +static void gru_free_notifier(struct mmu_notifier *mn) +{ + kfree(container_of(mn, struct gru_mm_struct, ms_notifier)); + STAT(gms_free); +} static const struct mmu_notifier_ops gru_mmuops = { .invalidate_range_start = gru_invalidate_range_start, .invalidate_range_end = gru_invalidate_range_end, - .release = gru_release, + .alloc_notifier = gru_alloc_notifier, + .free_notifier = gru_free_notifier, }; -/* Move this to the basic mmu_notifier file. But for now... */ -static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, - const struct mmu_notifier_ops *ops) -{ - struct mmu_notifier *mn, *gru_mn = NULL; - - if (mm->mmu_notifier_mm) { - rcu_read_lock(); - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, - hlist) - if (mn->ops == ops) { - gru_mn = mn; - break; - } - rcu_read_unlock(); - } - return gru_mn; -} - struct gru_mm_struct *gru_register_mmu_notifier(void) { - struct gru_mm_struct *gms; struct mmu_notifier *mn; - int err; - - mn = mmu_find_ops(current->mm, &gru_mmuops); - if (mn) { - gms = container_of(mn, struct gru_mm_struct, ms_notifier); - atomic_inc(&gms->ms_refcnt); - } else { - gms = kzalloc(sizeof(*gms), GFP_KERNEL); - if (!gms) - return ERR_PTR(-ENOMEM); - STAT(gms_alloc); - spin_lock_init(&gms->ms_asid_lock); - gms->ms_notifier.ops = &gru_mmuops; - atomic_set(&gms->ms_refcnt, 1); - init_waitqueue_head(&gms->ms_wait_queue); - err = __mmu_notifier_register(&gms->ms_notifier, current->mm); - if (err) - goto error; - } - if (gms) - gru_dbg(grudev, "gms %p, refcnt %d\n", gms, - atomic_read(&gms->ms_refcnt)); - return gms; -error: - kfree(gms); - return ERR_PTR(err); + + mn = mmu_notifier_get_locked(&gru_mmuops, current->mm); + if (IS_ERR(mn)) + return ERR_CAST(mn); + + return container_of(mn, struct gru_mm_struct, ms_notifier); } void gru_drop_mmu_notifier(struct gru_mm_struct *gms) { - gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms, - atomic_read(&gms->ms_refcnt), gms->ms_released); - if (atomic_dec_return(&gms->ms_refcnt) == 0) { - if (!gms->ms_released) - mmu_notifier_unregister(&gms->ms_notifier, current->mm); - kfree(gms); - STAT(gms_free); - } + mmu_notifier_put(&gms->ms_notifier); } /* diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile index bbb622c19c06..34c55a4045af 100644 --- a/drivers/misc/sgi-xp/Makefile +++ b/drivers/misc/sgi-xp/Makefile @@ -4,17 +4,10 @@ # obj-$(CONFIG_SGI_XP) += xp.o -xp-y := xp_main.o -xp-$(CONFIG_IA64_SGI_SN2) += xp_sn2.o xp_nofault.o -xp-$(CONFIG_IA64_GENERIC) += xp_sn2.o xp_nofault.o -xp-$(CONFIG_IA64_SGI_UV) += xp_uv.o -xp-$(CONFIG_X86_64) += xp_uv.o +xp-y := xp_main.o xp_uv.o obj-$(CONFIG_SGI_XP) += xpc.o -xpc-y := xpc_main.o xpc_channel.o xpc_partition.o -xpc-$(CONFIG_IA64_SGI_SN2) += xpc_sn2.o -xpc-$(CONFIG_IA64_GENERIC) += xpc_sn2.o -xpc-$(CONFIG_IA64_SGI_UV) += xpc_uv.o -xpc-$(CONFIG_X86_64) += xpc_uv.o +xpc-y := xpc_main.o xpc_channel.o xpc_partition.o \ + xpc_uv.o obj-$(CONFIG_SGI_XP) += xpnet.o diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index b8069eec18cb..06469b12aced 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -24,23 +24,6 @@ #define is_uv() 0 #endif -#if defined CONFIG_IA64 -#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */ -#define is_shub() ia64_platform_is("sn2") -#endif - -#ifndef is_shub1 -#define is_shub1() 0 -#endif - -#ifndef is_shub2 -#define is_shub2() 0 -#endif - -#ifndef is_shub -#define is_shub() 0 -#endif - #ifdef USE_DBUG_ON #define DBUG_ON(condition) BUG_ON(condition) #else @@ -360,9 +343,7 @@ extern int xp_nofault_PIOR(void *); extern int xp_error_PIOR(void); extern struct device *xp; -extern enum xp_retval xp_init_sn2(void); extern enum xp_retval xp_init_uv(void); -extern void xp_exit_sn2(void); extern void xp_exit_uv(void); #endif /* _DRIVERS_MISC_SGIXP_XP_H */ diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c index 6d7f557fd1c1..5fd94d836070 100644 --- a/drivers/misc/sgi-xp/xp_main.c +++ b/drivers/misc/sgi-xp/xp_main.c @@ -233,9 +233,7 @@ xp_init(void) for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) mutex_init(&xpc_registrations[ch_number].mutex); - if (is_shub()) - ret = xp_init_sn2(); - else if (is_uv()) + if (is_uv()) ret = xp_init_uv(); else ret = 0; @@ -251,9 +249,7 @@ module_init(xp_init); void __exit xp_exit(void) { - if (is_shub()) - xp_exit_sn2(); - else if (is_uv()) + if (is_uv()) xp_exit_uv(); } diff --git a/drivers/misc/sgi-xp/xp_nofault.S b/drivers/misc/sgi-xp/xp_nofault.S deleted file mode 100644 index e38d43319429..000000000000 --- a/drivers/misc/sgi-xp/xp_nofault.S +++ /dev/null @@ -1,35 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. - */ - -/* - * The xp_nofault_PIOR function takes a pointer to a remote PIO register - * and attempts to load and consume a value from it. This function - * will be registered as a nofault code block. In the event that the - * PIO read fails, the MCA handler will force the error to look - * corrected and vector to the xp_error_PIOR which will return an error. - * - * The definition of "consumption" and the time it takes for an MCA - * to surface is processor implementation specific. This code - * is sufficient on Itanium through the Montvale processor family. - * It may need to be adjusted for future processor implementations. - * - * extern int xp_nofault_PIOR(void *remote_register); - */ - - .global xp_nofault_PIOR -xp_nofault_PIOR: - mov r8=r0 // Stage a success return value - ld8.acq r9=[r32];; // PIO Read the specified register - adds r9=1,r9;; // Add to force consumption - srlz.i;; // Allow time for MCA to surface - br.ret.sptk.many b0;; // Return success - - .global xp_error_PIOR -xp_error_PIOR: - mov r8=1 // Return value of 1 - br.ret.sptk.many b0;; // Return failure diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c deleted file mode 100644 index d8e463f87241..000000000000 --- a/drivers/misc/sgi-xp/xp_sn2.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. - */ - -/* - * Cross Partition (XP) sn2-based functions. - * - * Architecture specific implementation of common functions. - */ - -#include <linux/module.h> -#include <linux/device.h> -#include <asm/sn/bte.h> -#include <asm/sn/sn_sal.h> -#include "xp.h" - -/* - * The export of xp_nofault_PIOR needs to happen here since it is defined - * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is - * defined here. - */ -EXPORT_SYMBOL_GPL(xp_nofault_PIOR); - -u64 xp_nofault_PIOR_target; -EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target); - -/* - * Register a nofault code region which performs a cross-partition PIO read. - * If the PIO read times out, the MCA handler will consume the error and - * return to a kernel-provided instruction to indicate an error. This PIO read - * exists because it is guaranteed to timeout if the destination is down - * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2, - * which unfortunately we have to work around). - */ -static enum xp_retval -xp_register_nofault_code_sn2(void) -{ - int ret; - u64 func_addr; - u64 err_func_addr; - - func_addr = *(u64 *)xp_nofault_PIOR; - err_func_addr = *(u64 *)xp_error_PIOR; - ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr, - 1, 1); - if (ret != 0) { - dev_err(xp, "can't register nofault code, error=%d\n", ret); - return xpSalError; - } - /* - * Setup the nofault PIO read target. (There is no special reason why - * SH_IPI_ACCESS was selected.) - */ - if (is_shub1()) - xp_nofault_PIOR_target = SH1_IPI_ACCESS; - else if (is_shub2()) - xp_nofault_PIOR_target = SH2_IPI_ACCESS0; - - return xpSuccess; -} - -static void -xp_unregister_nofault_code_sn2(void) -{ - u64 func_addr = *(u64 *)xp_nofault_PIOR; - u64 err_func_addr = *(u64 *)xp_error_PIOR; - - /* unregister the PIO read nofault code region */ - (void)sn_register_nofault_code(func_addr, err_func_addr, - err_func_addr, 1, 0); -} - -/* - * Convert a virtual memory address to a physical memory address. - */ -static unsigned long -xp_pa_sn2(void *addr) -{ - return __pa(addr); -} - -/* - * Convert a global physical to a socket physical address. - */ -static unsigned long -xp_socket_pa_sn2(unsigned long gpa) -{ - return gpa; -} - -/* - * Wrapper for bte_copy(). - * - * dst_pa - physical address of the destination of the transfer. - * src_pa - physical address of the source of the transfer. - * len - number of bytes to transfer from source to destination. - * - * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock. - */ -static enum xp_retval -xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa, - size_t len) -{ - bte_result_t ret; - - ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL); - if (ret == BTE_SUCCESS) - return xpSuccess; - - if (is_shub2()) { - dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa=" - "0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa, - src_pa, len); - } else { - dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx " - "src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len); - } - - return xpBteCopyError; -} - -static int -xp_cpu_to_nasid_sn2(int cpuid) -{ - return cpuid_to_nasid(cpuid); -} - -static enum xp_retval -xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size) -{ - u64 nasid_array = 0; - int ret; - - ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, - &nasid_array); - if (ret != 0) { - dev_err(xp, "sn_change_memprotect(,, " - "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); - return xpSalError; - } - return xpSuccess; -} - -static enum xp_retval -xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size) -{ - u64 nasid_array = 0; - int ret; - - ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, - &nasid_array); - if (ret != 0) { - dev_err(xp, "sn_change_memprotect(,, " - "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); - return xpSalError; - } - return xpSuccess; -} - -enum xp_retval -xp_init_sn2(void) -{ - BUG_ON(!is_shub()); - - xp_max_npartitions = XP_MAX_NPARTITIONS_SN2; - xp_partition_id = sn_partition_id; - xp_region_size = sn_region_size; - - xp_pa = xp_pa_sn2; - xp_socket_pa = xp_socket_pa_sn2; - xp_remote_memcpy = xp_remote_memcpy_sn2; - xp_cpu_to_nasid = xp_cpu_to_nasid_sn2; - xp_expand_memprotect = xp_expand_memprotect_sn2; - xp_restrict_memprotect = xp_restrict_memprotect_sn2; - - return xp_register_nofault_code_sn2(); -} - -void -xp_exit_sn2(void) -{ - BUG_ON(!is_shub()); - - xp_unregister_nofault_code_sn2(); -} - diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c index a0d093274dc0..f15a9f2ac1dd 100644 --- a/drivers/misc/sgi-xp/xp_uv.c +++ b/drivers/misc/sgi-xp/xp_uv.c @@ -17,7 +17,7 @@ #include <asm/uv/uv_hub.h> #if defined CONFIG_X86_64 #include <asm/uv/bios.h> -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#elif defined CONFIG_IA64_SGI_UV #include <asm/sn/sn_sal.h> #endif #include "../sgi-gru/grukservices.h" @@ -99,7 +99,7 @@ xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size) return xpBiosError; } -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#elif defined CONFIG_IA64_SGI_UV u64 nasid_array; ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, @@ -129,7 +129,7 @@ xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size) return xpBiosError; } -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#elif defined CONFIG_IA64_SGI_UV u64 nasid_array; ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, @@ -151,9 +151,10 @@ xp_init_uv(void) BUG_ON(!is_uv()); xp_max_npartitions = XP_MAX_NPARTITIONS_UV; +#ifdef CONFIG_X86 xp_partition_id = sn_partition_id; xp_region_size = sn_region_size; - +#endif xp_pa = xp_pa_uv; xp_socket_pa = xp_socket_pa_uv; xp_remote_memcpy = xp_remote_memcpy_uv; diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h index b94d5f767703..71db60edff65 100644 --- a/drivers/misc/sgi-xp/xpc.h +++ b/drivers/misc/sgi-xp/xpc.h @@ -71,14 +71,10 @@ * 'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids * and xpc_mach_nasids.) * - * vars (ia64-sn2 only) - * vars part (ia64-sn2 only) - * * Immediately following the mach_nasids mask are the XPC variables * required by other partitions. First are those that are generic to all * partitions (vars), followed on the next available cacheline by those * which are partition specific (vars part). These are setup by XPC. - * (Local partition's vars pointers are xpc_vars and xpc_vars_part.) * * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been * initialized. @@ -93,9 +89,6 @@ struct xpc_rsvd_page { unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ union { struct { - unsigned long vars_pa; /* phys addr */ - } sn2; - struct { unsigned long heartbeat_gpa; /* phys addr */ unsigned long activate_gru_mq_desc_gpa; /* phys addr */ } uv; @@ -106,84 +99,14 @@ struct xpc_rsvd_page { #define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */ -/* - * Define the structures by which XPC variables can be exported to other - * partitions. (There are two: struct xpc_vars and struct xpc_vars_part) - */ - -/* - * The following structure describes the partition generic variables - * needed by other partitions in order to properly initialize. - * - * struct xpc_vars version number also applies to struct xpc_vars_part. - * Changes to either structure and/or related functionality should be - * reflected by incrementing either the major or minor version numbers - * of struct xpc_vars. - */ -struct xpc_vars_sn2 { - u8 version; - u64 heartbeat; - DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); - u64 heartbeat_offline; /* if 0, heartbeat should be changing */ - int activate_IRQ_nasid; - int activate_IRQ_phys_cpuid; - unsigned long vars_part_pa; - unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */ - struct amo *amos_page; /* vaddr of page of amos from MSPEC driver */ -}; - -#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */ - -/* - * The following structure describes the per partition specific variables. - * - * An array of these structures, one per partition, will be defined. As a - * partition becomes active XPC will copy the array entry corresponding to - * itself from that partition. It is desirable that the size of this structure - * evenly divides into a 128-byte cacheline, such that none of the entries in - * this array crosses a 128-byte cacheline boundary. As it is now, each entry - * occupies 64-bytes. - */ -struct xpc_vars_part_sn2 { - u64 magic; - - unsigned long openclose_args_pa; /* phys addr of open and close args */ - unsigned long GPs_pa; /* physical address of Get/Put values */ - - unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */ - - int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ - int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ - - u8 nchannels; /* #of defined channels supported */ - - u8 reserved[23]; /* pad to a full 64 bytes */ -}; - -/* - * The vars_part MAGIC numbers play a part in the first contact protocol. - * - * MAGIC1 indicates that the per partition specific variables for a remote - * partition have been initialized by this partition. - * - * MAGIC2 indicates that this partition has pulled the remote partititions - * per partition variables that pertain to this partition. - */ -#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ -#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ - /* the reserved page sizes and offsets */ #define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)) -#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2)) #define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \ XPC_RP_HEADER_SIZE)) #define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \ xpc_nasid_mask_nlongs) -#define XPC_RP_VARS(_rp) ((struct xpc_vars_sn2 *) \ - (XPC_RP_MACH_NASIDS(_rp) + \ - xpc_nasid_mask_nlongs)) /* @@ -298,17 +221,6 @@ struct xpc_activate_mq_msg_chctl_opencomplete_uv { #define XPC_UNPACK_ARG2(_args) ((((u64)_args) >> 32) & 0xffffffff) /* - * Define a Get/Put value pair (pointers) used with a message queue. - */ -struct xpc_gp_sn2 { - s64 get; /* Get value */ - s64 put; /* Put value */ -}; - -#define XPC_GP_SIZE \ - L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS) - -/* * Define a structure that contains arguments associated with opening and * closing a channel. */ @@ -341,30 +253,6 @@ struct xpc_fifo_head_uv { }; /* - * Define a sn2 styled message. - * - * A user-defined message resides in the payload area. The max size of the - * payload is defined by the user via xpc_connect(). - * - * The size of a message entry (within a message queue) must be a 128-byte - * cacheline sized multiple in order to facilitate the BTE transfer of messages - * from one message queue to another. - */ -struct xpc_msg_sn2 { - u8 flags; /* FOR XPC INTERNAL USE ONLY */ - u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */ - s64 number; /* FOR XPC INTERNAL USE ONLY */ - - u64 payload; /* user defined portion of message */ -}; - -/* struct xpc_msg_sn2 flags */ - -#define XPC_M_SN2_DONE 0x01 /* msg has been received/consumed */ -#define XPC_M_SN2_READY 0x02 /* msg is ready to be sent */ -#define XPC_M_SN2_INTERRUPT 0x04 /* send interrupt when msg consumed */ - -/* * The format of a uv XPC notify_mq GRU message is as follows: * * A user-defined message resides in the payload area. The max size of the @@ -390,20 +278,6 @@ struct xpc_notify_mq_msg_uv { unsigned long payload; }; -/* - * Define sn2's notify entry. - * - * This is used to notify a message's sender that their message was received - * and consumed by the intended recipient. - */ -struct xpc_notify_sn2 { - u8 type; /* type of notification */ - - /* the following two fields are only used if type == XPC_N_CALL */ - xpc_notify_func func; /* user's notify function */ - void *key; /* pointer to user's key */ -}; - /* struct xpc_notify_sn2 type of notification */ #define XPC_N_CALL 0x01 /* notify function provided by user */ @@ -431,102 +305,6 @@ struct xpc_send_msg_slot_uv { * of these structures for each potential channel connection to that partition. */ -/* - * The following is sn2 only. - * - * Each channel structure manages two message queues (circular buffers). - * They are allocated at the time a channel connection is made. One of - * these message queues (local_msgqueue) holds the locally created messages - * that are destined for the remote partition. The other of these message - * queues (remote_msgqueue) is a locally cached copy of the remote partition's - * own local_msgqueue. - * - * The following is a description of the Get/Put pointers used to manage these - * two message queues. Consider the local_msgqueue to be on one partition - * and the remote_msgqueue to be its cached copy on another partition. A - * description of what each of the lettered areas contains is included. - * - * - * local_msgqueue remote_msgqueue - * - * |/////////| |/////////| - * w_remote_GP.get --> +---------+ |/////////| - * | F | |/////////| - * remote_GP.get --> +---------+ +---------+ <-- local_GP->get - * | | | | - * | | | E | - * | | | | - * | | +---------+ <-- w_local_GP.get - * | B | |/////////| - * | | |////D////| - * | | |/////////| - * | | +---------+ <-- w_remote_GP.put - * | | |////C////| - * local_GP->put --> +---------+ +---------+ <-- remote_GP.put - * | | |/////////| - * | A | |/////////| - * | | |/////////| - * w_local_GP.put --> +---------+ |/////////| - * |/////////| |/////////| - * - * - * ( remote_GP.[get|put] are cached copies of the remote - * partition's local_GP->[get|put], and thus their values can - * lag behind their counterparts on the remote partition. ) - * - * - * A - Messages that have been allocated, but have not yet been sent to the - * remote partition. - * - * B - Messages that have been sent, but have not yet been acknowledged by the - * remote partition as having been received. - * - * C - Area that needs to be prepared for the copying of sent messages, by - * the clearing of the message flags of any previously received messages. - * - * D - Area into which sent messages are to be copied from the remote - * partition's local_msgqueue and then delivered to their intended - * recipients. [ To allow for a multi-message copy, another pointer - * (next_msg_to_pull) has been added to keep track of the next message - * number needing to be copied (pulled). It chases after w_remote_GP.put. - * Any messages lying between w_local_GP.get and next_msg_to_pull have - * been copied and are ready to be delivered. ] - * - * E - Messages that have been copied and delivered, but have not yet been - * acknowledged by the recipient as having been received. - * - * F - Messages that have been acknowledged, but XPC has not yet notified the - * sender that the message was received by its intended recipient. - * This is also an area that needs to be prepared for the allocating of - * new messages, by the clearing of the message flags of the acknowledged - * messages. - */ - -struct xpc_channel_sn2 { - struct xpc_openclose_args *local_openclose_args; /* args passed on */ - /* opening or closing of channel */ - - void *local_msgqueue_base; /* base address of kmalloc'd space */ - struct xpc_msg_sn2 *local_msgqueue; /* local message queue */ - void *remote_msgqueue_base; /* base address of kmalloc'd space */ - struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */ - /* partition's local message queue */ - unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */ - /* local message queue */ - - struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */ - - /* various flavors of local and remote Get/Put values */ - - struct xpc_gp_sn2 *local_GP; /* local Get/Put values */ - struct xpc_gp_sn2 remote_GP; /* remote Get/Put values */ - struct xpc_gp_sn2 w_local_GP; /* working local Get/Put values */ - struct xpc_gp_sn2 w_remote_GP; /* working remote Get/Put values */ - s64 next_msg_to_pull; /* Put value of next msg to pull */ - - struct mutex msg_to_pull_mutex; /* next msg to pull serialization */ -}; - struct xpc_channel_uv { void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */ /* gru mq descriptor */ @@ -579,7 +357,6 @@ struct xpc_channel { wait_queue_head_t idle_wq; /* idle kthread wait queue */ union { - struct xpc_channel_sn2 sn2; struct xpc_channel_uv uv; } sn; @@ -666,43 +443,6 @@ xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl) return 0; } -/* - * Manage channels on a partition basis. There is one of these structures - * for each partition (a partition will never utilize the structure that - * represents itself). - */ - -struct xpc_partition_sn2 { - unsigned long remote_amos_page_pa; /* paddr of partition's amos page */ - int activate_IRQ_nasid; /* active partition's act/deact nasid */ - int activate_IRQ_phys_cpuid; /* active part's act/deact phys cpuid */ - - unsigned long remote_vars_pa; /* phys addr of partition's vars */ - unsigned long remote_vars_part_pa; /* paddr of partition's vars part */ - u8 remote_vars_version; /* version# of partition's vars */ - - void *local_GPs_base; /* base address of kmalloc'd space */ - struct xpc_gp_sn2 *local_GPs; /* local Get/Put values */ - void *remote_GPs_base; /* base address of kmalloc'd space */ - struct xpc_gp_sn2 *remote_GPs; /* copy of remote partition's local */ - /* Get/Put values */ - unsigned long remote_GPs_pa; /* phys addr of remote partition's local */ - /* Get/Put values */ - - void *local_openclose_args_base; /* base address of kmalloc'd space */ - struct xpc_openclose_args *local_openclose_args; /* local's args */ - unsigned long remote_openclose_args_pa; /* phys addr of remote's args */ - - int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ - int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ - char notify_IRQ_owner[8]; /* notify IRQ's owner's name */ - - struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */ - struct amo *local_chctl_amo_va; /* address of chctl flags' amo */ - - struct timer_list dropped_notify_IRQ_timer; /* dropped IRQ timer */ -}; - struct xpc_partition_uv { unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */ struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */ @@ -774,7 +514,6 @@ struct xpc_partition { wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */ union { - struct xpc_partition_sn2 sn2; struct xpc_partition_uv uv; } sn; @@ -854,14 +593,6 @@ struct xpc_arch_operations { #define XPC_P_SS_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ #define XPC_P_SS_TORNDOWN 0x03 /* infrastructure is torndown */ -/* - * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the - * following interval #of seconds before checking for dropped notify IRQs. - * These can occur whenever an IRQ's associated amo write doesn't complete - * until after the IRQ was received. - */ -#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL (0.25 * HZ) - /* number of seconds to wait for other partitions to disengage */ #define XPC_DISENGAGE_DEFAULT_TIMELIMIT 90 @@ -888,10 +619,6 @@ extern void xpc_activate_kthreads(struct xpc_channel *, int); extern void xpc_create_kthreads(struct xpc_channel *, int, int); extern void xpc_disconnect_wait(int); -/* found in xpc_sn2.c */ -extern int xpc_init_sn2(void); -extern void xpc_exit_sn2(void); - /* found in xpc_uv.c */ extern int xpc_init_uv(void); extern void xpc_exit_uv(void); diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 83fc748a91a7..79a963105983 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -279,13 +279,6 @@ xpc_hb_checker(void *ignore) dev_dbg(xpc_part, "checking remote heartbeats\n"); xpc_check_remote_hb(); - - /* - * On sn2 we need to periodically recheck to ensure no - * IRQ/amo pairs have been missed. - */ - if (is_shub()) - force_IRQ = 1; } /* check for outstanding IRQs */ @@ -1050,9 +1043,7 @@ xpc_do_exit(enum xp_retval reason) xpc_teardown_partitions(); - if (is_shub()) - xpc_exit_sn2(); - else if (is_uv()) + if (is_uv()) xpc_exit_uv(); } @@ -1235,21 +1226,7 @@ xpc_init(void) dev_set_name(xpc_part, "part"); dev_set_name(xpc_chan, "chan"); - if (is_shub()) { - /* - * The ia64-sn2 architecture supports at most 64 partitions. - * And the inability to unregister remote amos restricts us - * further to only support exactly 64 partitions on this - * architecture, no less. - */ - if (xp_max_npartitions != 64) { - dev_err(xpc_part, "max #of partitions not set to 64\n"); - ret = -EINVAL; - } else { - ret = xpc_init_sn2(); - } - - } else if (is_uv()) { + if (is_uv()) { ret = xpc_init_uv(); } else { @@ -1335,9 +1312,7 @@ out_2: xpc_teardown_partitions(); out_1: - if (is_shub()) - xpc_exit_sn2(); - else if (is_uv()) + if (is_uv()) xpc_exit_uv(); return ret; } diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c index 782ce95d3f17..21a04bc97d40 100644 --- a/drivers/misc/sgi-xp/xpc_partition.c +++ b/drivers/misc/sgi-xp/xpc_partition.c @@ -93,10 +93,6 @@ xpc_get_rsvd_page_pa(int nasid) if (ret != xpNeedMoreInfo) break; - /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */ - if (is_shub()) - len = L1_CACHE_ALIGN(len); - if (len > buf_len) { kfree(buf_base); buf_len = L1_CACHE_ALIGN(len); @@ -452,7 +448,6 @@ xpc_discovery(void) case 32: max_regions *= 2; region_size = 16; - DBUG_ON(!is_shub2()); } } diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c deleted file mode 100644 index 0ae69b9390ce..000000000000 --- a/drivers/misc/sgi-xp/xpc_sn2.c +++ /dev/null @@ -1,2459 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. - */ - -/* - * Cross Partition Communication (XPC) sn2-based functions. - * - * Architecture specific implementation of common functions. - * - */ - -#include <linux/delay.h> -#include <linux/slab.h> -#include <asm/uncached.h> -#include <asm/sn/mspec.h> -#include <asm/sn/sn_sal.h> -#include "xpc.h" - -/* - * Define the number of u64s required to represent all the C-brick nasids - * as a bitmap. The cross-partition kernel modules deal only with - * C-brick nasids, thus the need for bitmaps which don't account for - * odd-numbered (non C-brick) nasids. - */ -#define XPC_MAX_PHYSNODES_SN2 (MAX_NUMALINK_NODES / 2) -#define XP_NASID_MASK_BYTES_SN2 ((XPC_MAX_PHYSNODES_SN2 + 7) / 8) -#define XP_NASID_MASK_WORDS_SN2 ((XPC_MAX_PHYSNODES_SN2 + 63) / 64) - -/* - * Memory for XPC's amo variables is allocated by the MSPEC driver. These - * pages are located in the lowest granule. The lowest granule uses 4k pages - * for cached references and an alternate TLB handler to never provide a - * cacheable mapping for the entire region. This will prevent speculative - * reading of cached copies of our lines from being issued which will cause - * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 - * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of - * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify - * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote - * partitions (i.e., XPCs) consider themselves currently engaged with the - * local XPC and 1 amo variable to request partition deactivation. - */ -#define XPC_NOTIFY_IRQ_AMOS_SN2 0 -#define XPC_ACTIVATE_IRQ_AMOS_SN2 (XPC_NOTIFY_IRQ_AMOS_SN2 + \ - XP_MAX_NPARTITIONS_SN2) -#define XPC_ENGAGED_PARTITIONS_AMO_SN2 (XPC_ACTIVATE_IRQ_AMOS_SN2 + \ - XP_NASID_MASK_WORDS_SN2) -#define XPC_DEACTIVATE_REQUEST_AMO_SN2 (XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1) - -/* - * Buffer used to store a local copy of portions of a remote partition's - * reserved page (either its header and part_nasids mask, or its vars). - */ -static void *xpc_remote_copy_buffer_base_sn2; -static char *xpc_remote_copy_buffer_sn2; - -static struct xpc_vars_sn2 *xpc_vars_sn2; -static struct xpc_vars_part_sn2 *xpc_vars_part_sn2; - -static int -xpc_setup_partitions_sn2(void) -{ - /* nothing needs to be done */ - return 0; -} - -static void -xpc_teardown_partitions_sn2(void) -{ - /* nothing needs to be done */ -} - -/* SH_IPI_ACCESS shub register value on startup */ -static u64 xpc_sh1_IPI_access_sn2; -static u64 xpc_sh2_IPI_access0_sn2; -static u64 xpc_sh2_IPI_access1_sn2; -static u64 xpc_sh2_IPI_access2_sn2; -static u64 xpc_sh2_IPI_access3_sn2; - -/* - * Change protections to allow IPI operations. - */ -static void -xpc_allow_IPI_ops_sn2(void) -{ - int node; - int nasid; - - /* !!! The following should get moved into SAL. */ - if (is_shub2()) { - xpc_sh2_IPI_access0_sn2 = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); - xpc_sh2_IPI_access1_sn2 = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); - xpc_sh2_IPI_access2_sn2 = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); - xpc_sh2_IPI_access3_sn2 = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), - -1UL); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), - -1UL); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), - -1UL); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), - -1UL); - } - } else { - xpc_sh1_IPI_access_sn2 = - (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), - -1UL); - } - } -} - -/* - * Restrict protections to disallow IPI operations. - */ -static void -xpc_disallow_IPI_ops_sn2(void) -{ - int node; - int nasid; - - /* !!! The following should get moved into SAL. */ - if (is_shub2()) { - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), - xpc_sh2_IPI_access0_sn2); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), - xpc_sh2_IPI_access1_sn2); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), - xpc_sh2_IPI_access2_sn2); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), - xpc_sh2_IPI_access3_sn2); - } - } else { - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), - xpc_sh1_IPI_access_sn2); - } - } -} - -/* - * The following set of functions are used for the sending and receiving of - * IRQs (also known as IPIs). There are two flavors of IRQs, one that is - * associated with partition activity (SGI_XPC_ACTIVATE) and the other that - * is associated with channel activity (SGI_XPC_NOTIFY). - */ - -static u64 -xpc_receive_IRQ_amo_sn2(struct amo *amo) -{ - return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR); -} - -static enum xp_retval -xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid, - int vector) -{ - int ret = 0; - unsigned long irq_flags; - - local_irq_save(irq_flags); - - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag); - sn_send_IPI_phys(nasid, phys_cpuid, vector, 0); - - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IRQs and amos to it until the heartbeat times out. - */ - ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); - - return (ret == 0) ? xpSuccess : xpPioReadError; -} - -static struct amo * -xpc_init_IRQ_amo_sn2(int index) -{ - struct amo *amo = xpc_vars_sn2->amos_page + index; - - (void)xpc_receive_IRQ_amo_sn2(amo); /* clear amo variable */ - return amo; -} - -/* - * Functions associated with SGI_XPC_ACTIVATE IRQ. - */ - -/* - * Notify the heartbeat check thread that an activate IRQ has been received. - */ -static irqreturn_t -xpc_handle_activate_IRQ_sn2(int irq, void *dev_id) -{ - unsigned long irq_flags; - - spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); - xpc_activate_IRQ_rcvd++; - spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); - - wake_up_interruptible(&xpc_activate_IRQ_wq); - return IRQ_HANDLED; -} - -/* - * Flag the appropriate amo variable and send an IRQ to the specified node. - */ -static void -xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid, - int to_nasid, int to_phys_cpuid) -{ - struct amo *amos = (struct amo *)__va(amos_page_pa + - (XPC_ACTIVATE_IRQ_AMOS_SN2 * - sizeof(struct amo))); - - (void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)], - BIT_MASK(from_nasid / 2), to_nasid, - to_phys_cpuid, SGI_XPC_ACTIVATE); -} - -static void -xpc_send_local_activate_IRQ_sn2(int from_nasid) -{ - unsigned long irq_flags; - struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa + - (XPC_ACTIVATE_IRQ_AMOS_SN2 * - sizeof(struct amo))); - - /* fake the sending and receipt of an activate IRQ from remote nasid */ - FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable), - FETCHOP_OR, BIT_MASK(from_nasid / 2)); - - spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); - xpc_activate_IRQ_rcvd++; - spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); - - wake_up_interruptible(&xpc_activate_IRQ_wq); -} - -/* - * Functions associated with SGI_XPC_NOTIFY IRQ. - */ - -/* - * Check to see if any chctl flags were sent from the specified partition. - */ -static void -xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part) -{ - union xpc_channel_ctl_flags chctl; - unsigned long irq_flags; - - chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2. - local_chctl_amo_va); - if (chctl.all_flags == 0) - return; - - spin_lock_irqsave(&part->chctl_lock, irq_flags); - part->chctl.all_flags |= chctl.all_flags; - spin_unlock_irqrestore(&part->chctl_lock, irq_flags); - - dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags=" - "0x%llx\n", XPC_PARTID(part), chctl.all_flags); - - xpc_wakeup_channel_mgr(part); -} - -/* - * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified - * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more - * than one partition, we use an amo structure per partition to indicate - * whether a partition has sent an IRQ or not. If it has, then wake up the - * associated kthread to handle it. - * - * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC - * running on other partitions. - * - * Noteworthy Arguments: - * - * irq - Interrupt ReQuest number. NOT USED. - * - * dev_id - partid of IRQ's potential sender. - */ -static irqreturn_t -xpc_handle_notify_IRQ_sn2(int irq, void *dev_id) -{ - short partid = (short)(u64)dev_id; - struct xpc_partition *part = &xpc_partitions[partid]; - - DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2); - - if (xpc_part_ref(part)) { - xpc_check_for_sent_chctl_flags_sn2(part); - - xpc_part_deref(part); - } - return IRQ_HANDLED; -} - -/* - * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor - * because the write to their associated amo variable completed after the IRQ - * was received. - */ -static void -xpc_check_for_dropped_notify_IRQ_sn2(struct timer_list *t) -{ - struct xpc_partition *part = - from_timer(part, t, sn.sn2.dropped_notify_IRQ_timer); - - if (xpc_part_ref(part)) { - xpc_check_for_sent_chctl_flags_sn2(part); - - t->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; - add_timer(t); - xpc_part_deref(part); - } -} - -/* - * Send a notify IRQ to the remote partition that is associated with the - * specified channel. - */ -static void -xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, - char *chctl_flag_string, unsigned long *irq_flags) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; - union xpc_channel_ctl_flags chctl = { 0 }; - enum xp_retval ret; - - if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) { - chctl.flags[ch->number] = chctl_flag; - ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va, - chctl.all_flags, - part_sn2->notify_IRQ_nasid, - part_sn2->notify_IRQ_phys_cpuid, - SGI_XPC_NOTIFY); - dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n", - chctl_flag_string, ch->partid, ch->number, ret); - if (unlikely(ret != xpSuccess)) { - if (irq_flags != NULL) - spin_unlock_irqrestore(&ch->lock, *irq_flags); - XPC_DEACTIVATE_PARTITION(part, ret); - if (irq_flags != NULL) - spin_lock_irqsave(&ch->lock, *irq_flags); - } - } -} - -#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \ - xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f) - -/* - * Make it look like the remote partition, which is associated with the - * specified channel, sent us a notify IRQ. This faked IRQ will be handled - * by xpc_check_for_dropped_notify_IRQ_sn2(). - */ -static void -xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, - char *chctl_flag_string) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - union xpc_channel_ctl_flags chctl = { 0 }; - - chctl.flags[ch->number] = chctl_flag; - FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va-> - variable), FETCHOP_OR, chctl.all_flags); - dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n", - chctl_flag_string, ch->partid, ch->number); -} - -#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \ - xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f) - -static void -xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch, - unsigned long *irq_flags) -{ - struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; - - args->reason = ch->reason; - XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags); -} - -static void -xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) -{ - XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags); -} - -static void -xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; - - args->entry_size = ch->entry_size; - args->local_nentries = ch->local_nentries; - XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags); -} - -static void -xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) -{ - struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; - - args->remote_nentries = ch->remote_nentries; - args->local_nentries = ch->local_nentries; - args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue); - XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags); -} - -static void -xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch, - unsigned long *irq_flags) -{ - XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags); -} - -static void -xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch) -{ - XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL); -} - -static void -xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch) -{ - XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST); -} - -static enum xp_retval -xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch, - unsigned long msgqueue_pa) -{ - ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa; - return xpSuccess; -} - -/* - * This next set of functions are used to keep track of when a partition is - * potentially engaged in accessing memory belonging to another partition. - */ - -static void -xpc_indicate_partition_engaged_sn2(struct xpc_partition *part) -{ - unsigned long irq_flags; - struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + - (XPC_ENGAGED_PARTITIONS_AMO_SN2 * - sizeof(struct amo))); - - local_irq_save(irq_flags); - - /* set bit corresponding to our partid in remote partition's amo */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, - BIT(sn_partition_id)); - - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IRQs and amos to it until the heartbeat times out. - */ - (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static void -xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part) -{ - struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; - unsigned long irq_flags; - struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + - (XPC_ENGAGED_PARTITIONS_AMO_SN2 * - sizeof(struct amo))); - - local_irq_save(irq_flags); - - /* clear bit corresponding to our partid in remote partition's amo */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, - ~BIT(sn_partition_id)); - - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IRQs and amos to it until the heartbeat times out. - */ - (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); - - /* - * Send activate IRQ to get other side to see that we've cleared our - * bit in their engaged partitions amo. - */ - xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, - cnodeid_to_nasid(0), - part_sn2->activate_IRQ_nasid, - part_sn2->activate_IRQ_phys_cpuid); -} - -static void -xpc_assume_partition_disengaged_sn2(short partid) -{ - struct amo *amo = xpc_vars_sn2->amos_page + - XPC_ENGAGED_PARTITIONS_AMO_SN2; - - /* clear bit(s) based on partid mask in our partition's amo */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, - ~BIT(partid)); -} - -static int -xpc_partition_engaged_sn2(short partid) -{ - struct amo *amo = xpc_vars_sn2->amos_page + - XPC_ENGAGED_PARTITIONS_AMO_SN2; - - /* our partition's amo variable ANDed with partid mask */ - return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & - BIT(partid)) != 0; -} - -static int -xpc_any_partition_engaged_sn2(void) -{ - struct amo *amo = xpc_vars_sn2->amos_page + - XPC_ENGAGED_PARTITIONS_AMO_SN2; - - /* our partition's amo variable */ - return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0; -} - -/* original protection values for each node */ -static u64 xpc_prot_vec_sn2[MAX_NUMNODES]; - -/* - * Change protections to allow amo operations on non-Shub 1.1 systems. - */ -static enum xp_retval -xpc_allow_amo_ops_sn2(struct amo *amos_page) -{ - enum xp_retval ret = xpSuccess; - - /* - * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST - * collides with memory operations. On those systems we call - * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead. - */ - if (!enable_shub_wars_1_1()) - ret = xp_expand_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE); - - return ret; -} - -/* - * Change protections to allow amo operations on Shub 1.1 systems. - */ -static void -xpc_allow_amo_ops_shub_wars_1_1_sn2(void) -{ - int node; - int nasid; - - if (!enable_shub_wars_1_1()) - return; - - for_each_online_node(node) { - nasid = cnodeid_to_nasid(node); - /* save current protection values */ - xpc_prot_vec_sn2[node] = - (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0)); - /* open up everything */ - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQLP_MMR_DIR_PRIVEC0), - -1UL); - HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, - SH1_MD_DQRP_MMR_DIR_PRIVEC0), - -1UL); - } -} - -static enum xp_retval -xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa, - size_t *len) -{ - s64 status; - enum xp_retval ret; - - status = sn_partition_reserved_page_pa((u64)buf, cookie, - (u64 *)rp_pa, (u64 *)len); - if (status == SALRET_OK) - ret = xpSuccess; - else if (status == SALRET_MORE_PASSES) - ret = xpNeedMoreInfo; - else - ret = xpSalError; - - return ret; -} - - -static int -xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp) -{ - struct amo *amos_page; - int i; - int ret; - - xpc_vars_sn2 = XPC_RP_VARS(rp); - - rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2); - - /* vars_part array follows immediately after vars */ - xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) + - XPC_RP_VARS_SIZE); - - /* - * Before clearing xpc_vars_sn2, see if a page of amos had been - * previously allocated. If not we'll need to allocate one and set - * permissions so that cross-partition amos are allowed. - * - * The allocated amo page needs MCA reporting to remain disabled after - * XPC has unloaded. To make this work, we keep a copy of the pointer - * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure, - * which is pointed to by the reserved page, and re-use that saved copy - * on subsequent loads of XPC. This amo page is never freed, and its - * memory protections are never restricted. - */ - amos_page = xpc_vars_sn2->amos_page; - if (amos_page == NULL) { - amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1)); - if (amos_page == NULL) { - dev_err(xpc_part, "can't allocate page of amos\n"); - return -ENOMEM; - } - - /* - * Open up amo-R/W to cpu. This is done on Shub 1.1 systems - * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called. - */ - ret = xpc_allow_amo_ops_sn2(amos_page); - if (ret != xpSuccess) { - dev_err(xpc_part, "can't allow amo operations\n"); - uncached_free_page(__IA64_UNCACHED_OFFSET | - TO_PHYS((u64)amos_page), 1); - return -EPERM; - } - } - - /* clear xpc_vars_sn2 */ - memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2)); - - xpc_vars_sn2->version = XPC_V_VERSION; - xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0); - xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0); - xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2); - xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page); - xpc_vars_sn2->amos_page = amos_page; /* save for next load of XPC */ - - /* clear xpc_vars_part_sn2 */ - memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) * - XP_MAX_NPARTITIONS_SN2); - - /* initialize the activate IRQ related amo variables */ - for (i = 0; i < xpc_nasid_mask_nlongs; i++) - (void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i); - - /* initialize the engaged remote partitions related amo variables */ - (void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2); - (void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2); - - return 0; -} - -static int -xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask) -{ - return test_bit(partid, heartbeating_to_mask); -} - -static void -xpc_allow_hb_sn2(short partid) -{ - DBUG_ON(xpc_vars_sn2 == NULL); - set_bit(partid, xpc_vars_sn2->heartbeating_to_mask); -} - -static void -xpc_disallow_hb_sn2(short partid) -{ - DBUG_ON(xpc_vars_sn2 == NULL); - clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask); -} - -static void -xpc_disallow_all_hbs_sn2(void) -{ - DBUG_ON(xpc_vars_sn2 == NULL); - bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions); -} - -static void -xpc_increment_heartbeat_sn2(void) -{ - xpc_vars_sn2->heartbeat++; -} - -static void -xpc_offline_heartbeat_sn2(void) -{ - xpc_increment_heartbeat_sn2(); - xpc_vars_sn2->heartbeat_offline = 1; -} - -static void -xpc_online_heartbeat_sn2(void) -{ - xpc_increment_heartbeat_sn2(); - xpc_vars_sn2->heartbeat_offline = 0; -} - -static void -xpc_heartbeat_init_sn2(void) -{ - DBUG_ON(xpc_vars_sn2 == NULL); - - bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); - xpc_online_heartbeat_sn2(); -} - -static void -xpc_heartbeat_exit_sn2(void) -{ - xpc_offline_heartbeat_sn2(); -} - -static enum xp_retval -xpc_get_remote_heartbeat_sn2(struct xpc_partition *part) -{ - struct xpc_vars_sn2 *remote_vars; - enum xp_retval ret; - - remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; - - /* pull the remote vars structure that contains the heartbeat */ - ret = xp_remote_memcpy(xp_pa(remote_vars), - part->sn.sn2.remote_vars_pa, - XPC_RP_VARS_SIZE); - if (ret != xpSuccess) - return ret; - - dev_dbg(xpc_part, "partid=%d, heartbeat=%lld, last_heartbeat=%lld, " - "heartbeat_offline=%lld, HB_mask[0]=0x%lx\n", XPC_PARTID(part), - remote_vars->heartbeat, part->last_heartbeat, - remote_vars->heartbeat_offline, - remote_vars->heartbeating_to_mask[0]); - - if ((remote_vars->heartbeat == part->last_heartbeat && - !remote_vars->heartbeat_offline) || - !xpc_hb_allowed_sn2(sn_partition_id, - remote_vars->heartbeating_to_mask)) { - ret = xpNoHeartbeat; - } else { - part->last_heartbeat = remote_vars->heartbeat; - } - - return ret; -} - -/* - * Get a copy of the remote partition's XPC variables from the reserved page. - * - * remote_vars points to a buffer that is cacheline aligned for BTE copies and - * assumed to be of size XPC_RP_VARS_SIZE. - */ -static enum xp_retval -xpc_get_remote_vars_sn2(unsigned long remote_vars_pa, - struct xpc_vars_sn2 *remote_vars) -{ - enum xp_retval ret; - - if (remote_vars_pa == 0) - return xpVarsNotSet; - - /* pull over the cross partition variables */ - ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa, - XPC_RP_VARS_SIZE); - if (ret != xpSuccess) - return ret; - - if (XPC_VERSION_MAJOR(remote_vars->version) != - XPC_VERSION_MAJOR(XPC_V_VERSION)) { - return xpBadVersion; - } - - return xpSuccess; -} - -static void -xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp, - unsigned long remote_rp_pa, int nasid) -{ - xpc_send_local_activate_IRQ_sn2(nasid); -} - -static void -xpc_request_partition_reactivation_sn2(struct xpc_partition *part) -{ - xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid); -} - -static void -xpc_request_partition_deactivation_sn2(struct xpc_partition *part) -{ - struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; - unsigned long irq_flags; - struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + - (XPC_DEACTIVATE_REQUEST_AMO_SN2 * - sizeof(struct amo))); - - local_irq_save(irq_flags); - - /* set bit corresponding to our partid in remote partition's amo */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, - BIT(sn_partition_id)); - - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IRQs and amos to it until the heartbeat times out. - */ - (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); - - /* - * Send activate IRQ to get other side to see that we've set our - * bit in their deactivate request amo. - */ - xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, - cnodeid_to_nasid(0), - part_sn2->activate_IRQ_nasid, - part_sn2->activate_IRQ_phys_cpuid); -} - -static void -xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part) -{ - unsigned long irq_flags; - struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + - (XPC_DEACTIVATE_REQUEST_AMO_SN2 * - sizeof(struct amo))); - - local_irq_save(irq_flags); - - /* clear bit corresponding to our partid in remote partition's amo */ - FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, - ~BIT(sn_partition_id)); - - /* - * We must always use the nofault function regardless of whether we - * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we - * didn't, we'd never know that the other partition is down and would - * keep sending IRQs and amos to it until the heartbeat times out. - */ - (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> - variable), - xp_nofault_PIOR_target)); - - local_irq_restore(irq_flags); -} - -static int -xpc_partition_deactivation_requested_sn2(short partid) -{ - struct amo *amo = xpc_vars_sn2->amos_page + - XPC_DEACTIVATE_REQUEST_AMO_SN2; - - /* our partition's amo variable ANDed with partid mask */ - return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & - BIT(partid)) != 0; -} - -/* - * Update the remote partition's info. - */ -static void -xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version, - unsigned long *remote_rp_ts_jiffies, - unsigned long remote_rp_pa, - unsigned long remote_vars_pa, - struct xpc_vars_sn2 *remote_vars) -{ - struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; - - part->remote_rp_version = remote_rp_version; - dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", - part->remote_rp_version); - - part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies; - dev_dbg(xpc_part, " remote_rp_ts_jiffies = 0x%016lx\n", - part->remote_rp_ts_jiffies); - - part->remote_rp_pa = remote_rp_pa; - dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); - - part_sn2->remote_vars_pa = remote_vars_pa; - dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", - part_sn2->remote_vars_pa); - - part->last_heartbeat = remote_vars->heartbeat - 1; - dev_dbg(xpc_part, " last_heartbeat = 0x%016llx\n", - part->last_heartbeat); - - part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa; - dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", - part_sn2->remote_vars_part_pa); - - part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid; - dev_dbg(xpc_part, " activate_IRQ_nasid = 0x%x\n", - part_sn2->activate_IRQ_nasid); - - part_sn2->activate_IRQ_phys_cpuid = - remote_vars->activate_IRQ_phys_cpuid; - dev_dbg(xpc_part, " activate_IRQ_phys_cpuid = 0x%x\n", - part_sn2->activate_IRQ_phys_cpuid); - - part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa; - dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", - part_sn2->remote_amos_page_pa); - - part_sn2->remote_vars_version = remote_vars->version; - dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", - part_sn2->remote_vars_version); -} - -/* - * Prior code has determined the nasid which generated a activate IRQ. - * Inspect that nasid to determine if its partition needs to be activated - * or deactivated. - * - * A partition is considered "awaiting activation" if our partition - * flags indicate it is not active and it has a heartbeat. A - * partition is considered "awaiting deactivation" if our partition - * flags indicate it is active but it has no heartbeat or it is not - * sending its heartbeat to us. - * - * To determine the heartbeat, the remote nasid must have a properly - * initialized reserved page. - */ -static void -xpc_identify_activate_IRQ_req_sn2(int nasid) -{ - struct xpc_rsvd_page *remote_rp; - struct xpc_vars_sn2 *remote_vars; - unsigned long remote_rp_pa; - unsigned long remote_vars_pa; - int remote_rp_version; - int reactivate = 0; - unsigned long remote_rp_ts_jiffies = 0; - short partid; - struct xpc_partition *part; - struct xpc_partition_sn2 *part_sn2; - enum xp_retval ret; - - /* pull over the reserved page structure */ - - remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2; - - ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); - if (ret != xpSuccess) { - dev_warn(xpc_part, "unable to get reserved page from nasid %d, " - "which sent interrupt, reason=%d\n", nasid, ret); - return; - } - - remote_vars_pa = remote_rp->sn.sn2.vars_pa; - remote_rp_version = remote_rp->version; - remote_rp_ts_jiffies = remote_rp->ts_jiffies; - - partid = remote_rp->SAL_partid; - part = &xpc_partitions[partid]; - part_sn2 = &part->sn.sn2; - - /* pull over the cross partition variables */ - - remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; - - ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars); - if (ret != xpSuccess) { - dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " - "which sent interrupt, reason=%d\n", nasid, ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - return; - } - - part->activate_IRQ_rcvd++; - - dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " - "%lld:0x%lx\n", (int)nasid, (int)partid, - part->activate_IRQ_rcvd, - remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]); - - if (xpc_partition_disengaged(part) && - part->act_state == XPC_P_AS_INACTIVE) { - - xpc_update_partition_info_sn2(part, remote_rp_version, - &remote_rp_ts_jiffies, - remote_rp_pa, remote_vars_pa, - remote_vars); - - if (xpc_partition_deactivation_requested_sn2(partid)) { - /* - * Other side is waiting on us to deactivate even though - * we already have. - */ - return; - } - - xpc_activate_partition(part); - return; - } - - DBUG_ON(part->remote_rp_version == 0); - DBUG_ON(part_sn2->remote_vars_version == 0); - - if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) { - - /* the other side rebooted */ - - DBUG_ON(xpc_partition_engaged_sn2(partid)); - DBUG_ON(xpc_partition_deactivation_requested_sn2(partid)); - - xpc_update_partition_info_sn2(part, remote_rp_version, - &remote_rp_ts_jiffies, - remote_rp_pa, remote_vars_pa, - remote_vars); - reactivate = 1; - } - - if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) { - /* still waiting on other side to disengage from us */ - return; - } - - if (reactivate) - XPC_DEACTIVATE_PARTITION(part, xpReactivating); - else if (xpc_partition_deactivation_requested_sn2(partid)) - XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown); -} - -/* - * Loop through the activation amo variables and process any bits - * which are set. Each bit indicates a nasid sending a partition - * activation or deactivation request. - * - * Return #of IRQs detected. - */ -int -xpc_identify_activate_IRQ_sender_sn2(void) -{ - int l; - int b; - unsigned long nasid_mask_long; - u64 nasid; /* remote nasid */ - int n_IRQs_detected = 0; - struct amo *act_amos; - - act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2; - - /* scan through activate amo variables looking for non-zero entries */ - for (l = 0; l < xpc_nasid_mask_nlongs; l++) { - - if (xpc_exiting) - break; - - nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]); - - b = find_first_bit(&nasid_mask_long, BITS_PER_LONG); - if (b >= BITS_PER_LONG) { - /* no IRQs from nasids in this amo variable */ - continue; - } - - dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l, - nasid_mask_long); - - /* - * If this nasid has been added to the machine since - * our partition was reset, this will retain the - * remote nasid in our reserved pages machine mask. - * This is used in the event of module reload. - */ - xpc_mach_nasids[l] |= nasid_mask_long; - - /* locate the nasid(s) which sent interrupts */ - - do { - n_IRQs_detected++; - nasid = (l * BITS_PER_LONG + b) * 2; - dev_dbg(xpc_part, "interrupt from nasid %lld\n", nasid); - xpc_identify_activate_IRQ_req_sn2(nasid); - - b = find_next_bit(&nasid_mask_long, BITS_PER_LONG, - b + 1); - } while (b < BITS_PER_LONG); - } - return n_IRQs_detected; -} - -static void -xpc_process_activate_IRQ_rcvd_sn2(void) -{ - unsigned long irq_flags; - int n_IRQs_expected; - int n_IRQs_detected; - - spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); - n_IRQs_expected = xpc_activate_IRQ_rcvd; - xpc_activate_IRQ_rcvd = 0; - spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); - - n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2(); - if (n_IRQs_detected < n_IRQs_expected) { - /* retry once to help avoid missing amo */ - (void)xpc_identify_activate_IRQ_sender_sn2(); - } -} - -/* - * Setup the channel structures that are sn2 specific. - */ -static enum xp_retval -xpc_setup_ch_structures_sn2(struct xpc_partition *part) -{ - struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; - struct xpc_channel_sn2 *ch_sn2; - enum xp_retval retval; - int ret; - int cpuid; - int ch_number; - struct timer_list *timer; - short partid = XPC_PARTID(part); - - /* allocate all the required GET/PUT values */ - - part_sn2->local_GPs = - xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, - &part_sn2->local_GPs_base); - if (part_sn2->local_GPs == NULL) { - dev_err(xpc_chan, "can't get memory for local get/put " - "values\n"); - return xpNoMemory; - } - - part_sn2->remote_GPs = - xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, - &part_sn2->remote_GPs_base); - if (part_sn2->remote_GPs == NULL) { - dev_err(xpc_chan, "can't get memory for remote get/put " - "values\n"); - retval = xpNoMemory; - goto out_1; - } - - part_sn2->remote_GPs_pa = 0; - - /* allocate all the required open and close args */ - - part_sn2->local_openclose_args = - xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, - GFP_KERNEL, &part_sn2-> - local_openclose_args_base); - if (part_sn2->local_openclose_args == NULL) { - dev_err(xpc_chan, "can't get memory for local connect args\n"); - retval = xpNoMemory; - goto out_2; - } - - part_sn2->remote_openclose_args_pa = 0; - - part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid); - - part_sn2->notify_IRQ_nasid = 0; - part_sn2->notify_IRQ_phys_cpuid = 0; - part_sn2->remote_chctl_amo_va = NULL; - - sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid); - ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2, - IRQF_SHARED, part_sn2->notify_IRQ_owner, - (void *)(u64)partid); - if (ret != 0) { - dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " - "errno=%d\n", -ret); - retval = xpLackOfResources; - goto out_3; - } - - /* Setup a timer to check for dropped notify IRQs */ - timer = &part_sn2->dropped_notify_IRQ_timer; - timer_setup(timer, xpc_check_for_dropped_notify_IRQ_sn2, 0); - timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; - add_timer(timer); - - for (ch_number = 0; ch_number < part->nchannels; ch_number++) { - ch_sn2 = &part->channels[ch_number].sn.sn2; - - ch_sn2->local_GP = &part_sn2->local_GPs[ch_number]; - ch_sn2->local_openclose_args = - &part_sn2->local_openclose_args[ch_number]; - - mutex_init(&ch_sn2->msg_to_pull_mutex); - } - - /* - * Setup the per partition specific variables required by the - * remote partition to establish channel connections with us. - * - * The setting of the magic # indicates that these per partition - * specific variables are ready to be used. - */ - xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs); - xpc_vars_part_sn2[partid].openclose_args_pa = - xp_pa(part_sn2->local_openclose_args); - xpc_vars_part_sn2[partid].chctl_amo_pa = - xp_pa(part_sn2->local_chctl_amo_va); - cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */ - xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid); - xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid = - cpu_physical_id(cpuid); - xpc_vars_part_sn2[partid].nchannels = part->nchannels; - xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2; - - return xpSuccess; - - /* setup of ch structures failed */ -out_3: - kfree(part_sn2->local_openclose_args_base); - part_sn2->local_openclose_args = NULL; -out_2: - kfree(part_sn2->remote_GPs_base); - part_sn2->remote_GPs = NULL; -out_1: - kfree(part_sn2->local_GPs_base); - part_sn2->local_GPs = NULL; - return retval; -} - -/* - * Teardown the channel structures that are sn2 specific. - */ -static void -xpc_teardown_ch_structures_sn2(struct xpc_partition *part) -{ - struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; - short partid = XPC_PARTID(part); - - /* - * Indicate that the variables specific to the remote partition are no - * longer available for its use. - */ - xpc_vars_part_sn2[partid].magic = 0; - - /* in case we've still got outstanding timers registered... */ - del_timer_sync(&part_sn2->dropped_notify_IRQ_timer); - free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid); - - kfree(part_sn2->local_openclose_args_base); - part_sn2->local_openclose_args = NULL; - kfree(part_sn2->remote_GPs_base); - part_sn2->remote_GPs = NULL; - kfree(part_sn2->local_GPs_base); - part_sn2->local_GPs = NULL; - part_sn2->local_chctl_amo_va = NULL; -} - -/* - * Create a wrapper that hides the underlying mechanism for pulling a cacheline - * (or multiple cachelines) from a remote partition. - * - * src_pa must be a cacheline aligned physical address on the remote partition. - * dst must be a cacheline aligned virtual address on this partition. - * cnt must be cacheline sized - */ -/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */ -static enum xp_retval -xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst, - const unsigned long src_pa, size_t cnt) -{ - enum xp_retval ret; - - DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa)); - DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst)); - DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); - - if (part->act_state == XPC_P_AS_DEACTIVATING) - return part->reason; - - ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt); - if (ret != xpSuccess) { - dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed," - " ret=%d\n", XPC_PARTID(part), ret); - } - return ret; -} - -/* - * Pull the remote per partition specific variables from the specified - * partition. - */ -static enum xp_retval -xpc_pull_remote_vars_part_sn2(struct xpc_partition *part) -{ - struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; - u8 buffer[L1_CACHE_BYTES * 2]; - struct xpc_vars_part_sn2 *pulled_entry_cacheline = - (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer); - struct xpc_vars_part_sn2 *pulled_entry; - unsigned long remote_entry_cacheline_pa; - unsigned long remote_entry_pa; - short partid = XPC_PARTID(part); - enum xp_retval ret; - - /* pull the cacheline that contains the variables we're interested in */ - - DBUG_ON(part_sn2->remote_vars_part_pa != - L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa)); - DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2); - - remote_entry_pa = part_sn2->remote_vars_part_pa + - sn_partition_id * sizeof(struct xpc_vars_part_sn2); - - remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); - - pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline - + (remote_entry_pa & - (L1_CACHE_BYTES - 1))); - - ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline, - remote_entry_cacheline_pa, - L1_CACHE_BYTES); - if (ret != xpSuccess) { - dev_dbg(xpc_chan, "failed to pull XPC vars_part from " - "partition %d, ret=%d\n", partid, ret); - return ret; - } - - /* see if they've been set up yet */ - - if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 && - pulled_entry->magic != XPC_VP_MAGIC2_SN2) { - - if (pulled_entry->magic != 0) { - dev_dbg(xpc_chan, "partition %d's XPC vars_part for " - "partition %d has bad magic value (=0x%llx)\n", - partid, sn_partition_id, pulled_entry->magic); - return xpBadMagic; - } - - /* they've not been initialized yet */ - return xpRetry; - } - - if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) { - - /* validate the variables */ - - if (pulled_entry->GPs_pa == 0 || - pulled_entry->openclose_args_pa == 0 || - pulled_entry->chctl_amo_pa == 0) { - - dev_err(xpc_chan, "partition %d's XPC vars_part for " - "partition %d are not valid\n", partid, - sn_partition_id); - return xpInvalidAddress; - } - - /* the variables we imported look to be valid */ - - part_sn2->remote_GPs_pa = pulled_entry->GPs_pa; - part_sn2->remote_openclose_args_pa = - pulled_entry->openclose_args_pa; - part_sn2->remote_chctl_amo_va = - (struct amo *)__va(pulled_entry->chctl_amo_pa); - part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid; - part_sn2->notify_IRQ_phys_cpuid = - pulled_entry->notify_IRQ_phys_cpuid; - - if (part->nchannels > pulled_entry->nchannels) - part->nchannels = pulled_entry->nchannels; - - /* let the other side know that we've pulled their variables */ - - xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2; - } - - if (pulled_entry->magic == XPC_VP_MAGIC1_SN2) - return xpRetry; - - return xpSuccess; -} - -/* - * Establish first contact with the remote partititon. This involves pulling - * the XPC per partition variables from the remote partition and waiting for - * the remote partition to pull ours. - */ -static enum xp_retval -xpc_make_first_contact_sn2(struct xpc_partition *part) -{ - struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; - enum xp_retval ret; - - /* - * Register the remote partition's amos with SAL so it can handle - * and cleanup errors within that address range should the remote - * partition go down. We don't unregister this range because it is - * difficult to tell when outstanding writes to the remote partition - * are finished and thus when it is safe to unregister. This should - * not result in wasted space in the SAL xp_addr_region table because - * we should get the same page for remote_amos_page_pa after module - * reloads and system reboots. - */ - if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa, - PAGE_SIZE, 1) < 0) { - dev_warn(xpc_part, "xpc_activating(%d) failed to register " - "xp_addr region\n", XPC_PARTID(part)); - - ret = xpPhysAddrRegFailed; - XPC_DEACTIVATE_PARTITION(part, ret); - return ret; - } - - /* - * Send activate IRQ to get other side to activate if they've not - * already begun to do so. - */ - xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, - cnodeid_to_nasid(0), - part_sn2->activate_IRQ_nasid, - part_sn2->activate_IRQ_phys_cpuid); - - while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) { - if (ret != xpRetry) { - XPC_DEACTIVATE_PARTITION(part, ret); - return ret; - } - - dev_dbg(xpc_part, "waiting to make first contact with " - "partition %d\n", XPC_PARTID(part)); - - /* wait a 1/4 of a second or so */ - (void)msleep_interruptible(250); - - if (part->act_state == XPC_P_AS_DEACTIVATING) - return part->reason; - } - - return xpSuccess; -} - -/* - * Get the chctl flags and pull the openclose args and/or remote GPs as needed. - */ -static u64 -xpc_get_chctl_all_flags_sn2(struct xpc_partition *part) -{ - struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; - unsigned long irq_flags; - union xpc_channel_ctl_flags chctl; - enum xp_retval ret; - - /* - * See if there are any chctl flags to be handled. - */ - - spin_lock_irqsave(&part->chctl_lock, irq_flags); - chctl = part->chctl; - if (chctl.all_flags != 0) - part->chctl.all_flags = 0; - - spin_unlock_irqrestore(&part->chctl_lock, irq_flags); - - if (xpc_any_openclose_chctl_flags_set(&chctl)) { - ret = xpc_pull_remote_cachelines_sn2(part, part-> - remote_openclose_args, - part_sn2-> - remote_openclose_args_pa, - XPC_OPENCLOSE_ARGS_SIZE); - if (ret != xpSuccess) { - XPC_DEACTIVATE_PARTITION(part, ret); - - dev_dbg(xpc_chan, "failed to pull openclose args from " - "partition %d, ret=%d\n", XPC_PARTID(part), - ret); - - /* don't bother processing chctl flags anymore */ - chctl.all_flags = 0; - } - } - - if (xpc_any_msg_chctl_flags_set(&chctl)) { - ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs, - part_sn2->remote_GPs_pa, - XPC_GP_SIZE); - if (ret != xpSuccess) { - XPC_DEACTIVATE_PARTITION(part, ret); - - dev_dbg(xpc_chan, "failed to pull GPs from partition " - "%d, ret=%d\n", XPC_PARTID(part), ret); - - /* don't bother processing chctl flags anymore */ - chctl.all_flags = 0; - } - } - - return chctl.all_flags; -} - -/* - * Allocate the local message queue and the notify queue. - */ -static enum xp_retval -xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - unsigned long irq_flags; - int nentries; - size_t nbytes; - - for (nentries = ch->local_nentries; nentries > 0; nentries--) { - - nbytes = nentries * ch->entry_size; - ch_sn2->local_msgqueue = - xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, - &ch_sn2->local_msgqueue_base); - if (ch_sn2->local_msgqueue == NULL) - continue; - - nbytes = nentries * sizeof(struct xpc_notify_sn2); - ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL); - if (ch_sn2->notify_queue == NULL) { - kfree(ch_sn2->local_msgqueue_base); - ch_sn2->local_msgqueue = NULL; - continue; - } - - spin_lock_irqsave(&ch->lock, irq_flags); - if (nentries < ch->local_nentries) { - dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " - "partid=%d, channel=%d\n", nentries, - ch->local_nentries, ch->partid, ch->number); - - ch->local_nentries = nentries; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpSuccess; - } - - dev_dbg(xpc_chan, "can't get memory for local message queue and notify " - "queue, partid=%d, channel=%d\n", ch->partid, ch->number); - return xpNoMemory; -} - -/* - * Allocate the cached remote message queue. - */ -static enum xp_retval -xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - unsigned long irq_flags; - int nentries; - size_t nbytes; - - DBUG_ON(ch->remote_nentries <= 0); - - for (nentries = ch->remote_nentries; nentries > 0; nentries--) { - - nbytes = nentries * ch->entry_size; - ch_sn2->remote_msgqueue = - xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2-> - remote_msgqueue_base); - if (ch_sn2->remote_msgqueue == NULL) - continue; - - spin_lock_irqsave(&ch->lock, irq_flags); - if (nentries < ch->remote_nentries) { - dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " - "partid=%d, channel=%d\n", nentries, - ch->remote_nentries, ch->partid, ch->number); - - ch->remote_nentries = nentries; - } - spin_unlock_irqrestore(&ch->lock, irq_flags); - return xpSuccess; - } - - dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " - "partid=%d, channel=%d\n", ch->partid, ch->number); - return xpNoMemory; -} - -/* - * Allocate message queues and other stuff associated with a channel. - * - * Note: Assumes all of the channel sizes are filled in. - */ -static enum xp_retval -xpc_setup_msg_structures_sn2(struct xpc_channel *ch) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - enum xp_retval ret; - - DBUG_ON(ch->flags & XPC_C_SETUP); - - ret = xpc_allocate_local_msgqueue_sn2(ch); - if (ret == xpSuccess) { - - ret = xpc_allocate_remote_msgqueue_sn2(ch); - if (ret != xpSuccess) { - kfree(ch_sn2->local_msgqueue_base); - ch_sn2->local_msgqueue = NULL; - kfree(ch_sn2->notify_queue); - ch_sn2->notify_queue = NULL; - } - } - return ret; -} - -/* - * Free up message queues and other stuff that were allocated for the specified - * channel. - */ -static void -xpc_teardown_msg_structures_sn2(struct xpc_channel *ch) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - - lockdep_assert_held(&ch->lock); - - ch_sn2->remote_msgqueue_pa = 0; - - ch_sn2->local_GP->get = 0; - ch_sn2->local_GP->put = 0; - ch_sn2->remote_GP.get = 0; - ch_sn2->remote_GP.put = 0; - ch_sn2->w_local_GP.get = 0; - ch_sn2->w_local_GP.put = 0; - ch_sn2->w_remote_GP.get = 0; - ch_sn2->w_remote_GP.put = 0; - ch_sn2->next_msg_to_pull = 0; - - if (ch->flags & XPC_C_SETUP) { - dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", - ch->flags, ch->partid, ch->number); - - kfree(ch_sn2->local_msgqueue_base); - ch_sn2->local_msgqueue = NULL; - kfree(ch_sn2->remote_msgqueue_base); - ch_sn2->remote_msgqueue = NULL; - kfree(ch_sn2->notify_queue); - ch_sn2->notify_queue = NULL; - } -} - -/* - * Notify those who wanted to be notified upon delivery of their message. - */ -static void -xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put) -{ - struct xpc_notify_sn2 *notify; - u8 notify_type; - s64 get = ch->sn.sn2.w_remote_GP.get - 1; - - while (++get < put && atomic_read(&ch->n_to_notify) > 0) { - - notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries]; - - /* - * See if the notify entry indicates it was associated with - * a message who's sender wants to be notified. It is possible - * that it is, but someone else is doing or has done the - * notification. - */ - notify_type = notify->type; - if (notify_type == 0 || - cmpxchg(¬ify->type, notify_type, 0) != notify_type) { - continue; - } - - DBUG_ON(notify_type != XPC_N_CALL); - - atomic_dec(&ch->n_to_notify); - - if (notify->func != NULL) { - dev_dbg(xpc_chan, "notify->func() called, notify=0x%p " - "msg_number=%lld partid=%d channel=%d\n", - (void *)notify, get, ch->partid, ch->number); - - notify->func(reason, ch->partid, ch->number, - notify->key); - - dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p" - " msg_number=%lld partid=%d channel=%d\n", - (void *)notify, get, ch->partid, ch->number); - } - } -} - -static void -xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch) -{ - xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put); -} - -/* - * Clear some of the msg flags in the local message queue. - */ -static inline void -xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - struct xpc_msg_sn2 *msg; - s64 get; - - get = ch_sn2->w_remote_GP.get; - do { - msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + - (get % ch->local_nentries) * - ch->entry_size); - DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); - msg->flags = 0; - } while (++get < ch_sn2->remote_GP.get); -} - -/* - * Clear some of the msg flags in the remote message queue. - */ -static inline void -xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - struct xpc_msg_sn2 *msg; - s64 put, remote_nentries = ch->remote_nentries; - - /* flags are zeroed when the buffer is allocated */ - if (ch_sn2->remote_GP.put < remote_nentries) - return; - - put = max(ch_sn2->w_remote_GP.put, remote_nentries); - do { - msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + - (put % remote_nentries) * - ch->entry_size); - DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); - DBUG_ON(!(msg->flags & XPC_M_SN2_DONE)); - DBUG_ON(msg->number != put - remote_nentries); - msg->flags = 0; - } while (++put < ch_sn2->remote_GP.put); -} - -static int -xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch) -{ - return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get; -} - -static void -xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number) -{ - struct xpc_channel *ch = &part->channels[ch_number]; - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - int npayloads_sent; - - ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number]; - - /* See what, if anything, has changed for each connected channel */ - - xpc_msgqueue_ref(ch); - - if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get && - ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) { - /* nothing changed since GPs were last pulled */ - xpc_msgqueue_deref(ch); - return; - } - - if (!(ch->flags & XPC_C_CONNECTED)) { - xpc_msgqueue_deref(ch); - return; - } - - /* - * First check to see if messages recently sent by us have been - * received by the other side. (The remote GET value will have - * changed since we last looked at it.) - */ - - if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) { - - /* - * We need to notify any senders that want to be notified - * that their sent messages have been received by their - * intended recipients. We need to do this before updating - * w_remote_GP.get so that we don't allocate the same message - * queue entries prematurely (see xpc_allocate_msg()). - */ - if (atomic_read(&ch->n_to_notify) > 0) { - /* - * Notify senders that messages sent have been - * received and delivered by the other side. - */ - xpc_notify_senders_sn2(ch, xpMsgDelivered, - ch_sn2->remote_GP.get); - } - - /* - * Clear msg->flags in previously sent messages, so that - * they're ready for xpc_allocate_msg(). - */ - xpc_clear_local_msgqueue_flags_sn2(ch); - - ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get; - - dev_dbg(xpc_chan, "w_remote_GP.get changed to %lld, partid=%d, " - "channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid, - ch->number); - - /* - * If anyone was waiting for message queue entries to become - * available, wake them up. - */ - if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) - wake_up(&ch->msg_allocate_wq); - } - - /* - * Now check for newly sent messages by the other side. (The remote - * PUT value will have changed since we last looked at it.) - */ - - if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) { - /* - * Clear msg->flags in previously received messages, so that - * they're ready for xpc_get_deliverable_payload_sn2(). - */ - xpc_clear_remote_msgqueue_flags_sn2(ch); - - smp_wmb(); /* ensure flags have been cleared before bte_copy */ - ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put; - - dev_dbg(xpc_chan, "w_remote_GP.put changed to %lld, partid=%d, " - "channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid, - ch->number); - - npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch); - if (npayloads_sent > 0) { - dev_dbg(xpc_chan, "msgs waiting to be copied and " - "delivered=%d, partid=%d, channel=%d\n", - npayloads_sent, ch->partid, ch->number); - - if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) - xpc_activate_kthreads(ch, npayloads_sent); - } - } - - xpc_msgqueue_deref(ch); -} - -static struct xpc_msg_sn2 * -xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get) -{ - struct xpc_partition *part = &xpc_partitions[ch->partid]; - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - unsigned long remote_msg_pa; - struct xpc_msg_sn2 *msg; - u32 msg_index; - u32 nmsgs; - u64 msg_offset; - enum xp_retval ret; - - if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) { - /* we were interrupted by a signal */ - return NULL; - } - - while (get >= ch_sn2->next_msg_to_pull) { - - /* pull as many messages as are ready and able to be pulled */ - - msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries; - - DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put); - nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull; - if (msg_index + nmsgs > ch->remote_nentries) { - /* ignore the ones that wrap the msg queue for now */ - nmsgs = ch->remote_nentries - msg_index; - } - - msg_offset = msg_index * ch->entry_size; - msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + - msg_offset); - remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset; - - ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa, - nmsgs * ch->entry_size); - if (ret != xpSuccess) { - - dev_dbg(xpc_chan, "failed to pull %d msgs starting with" - " msg %lld from partition %d, channel=%d, " - "ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull, - ch->partid, ch->number, ret); - - XPC_DEACTIVATE_PARTITION(part, ret); - - mutex_unlock(&ch_sn2->msg_to_pull_mutex); - return NULL; - } - - ch_sn2->next_msg_to_pull += nmsgs; - } - - mutex_unlock(&ch_sn2->msg_to_pull_mutex); - - /* return the message we were looking for */ - msg_offset = (get % ch->remote_nentries) * ch->entry_size; - msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset); - - return msg; -} - -/* - * Get the next deliverable message's payload. - */ -static void * -xpc_get_deliverable_payload_sn2(struct xpc_channel *ch) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - struct xpc_msg_sn2 *msg; - void *payload = NULL; - s64 get; - - do { - if (ch->flags & XPC_C_DISCONNECTING) - break; - - get = ch_sn2->w_local_GP.get; - smp_rmb(); /* guarantee that .get loads before .put */ - if (get == ch_sn2->w_remote_GP.put) - break; - - /* There are messages waiting to be pulled and delivered. - * We need to try to secure one for ourselves. We'll do this - * by trying to increment w_local_GP.get and hope that no one - * else beats us to it. If they do, we'll we'll simply have - * to try again for the next one. - */ - - if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) { - /* we got the entry referenced by get */ - - dev_dbg(xpc_chan, "w_local_GP.get changed to %lld, " - "partid=%d, channel=%d\n", get + 1, - ch->partid, ch->number); - - /* pull the message from the remote partition */ - - msg = xpc_pull_remote_msg_sn2(ch, get); - - if (msg != NULL) { - DBUG_ON(msg->number != get); - DBUG_ON(msg->flags & XPC_M_SN2_DONE); - DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); - - payload = &msg->payload; - } - break; - } - - } while (1); - - return payload; -} - -/* - * Now we actually send the messages that are ready to be sent by advancing - * the local message queue's Put value and then send a chctl msgrequest to the - * recipient partition. - */ -static void -xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - struct xpc_msg_sn2 *msg; - s64 put = initial_put + 1; - int send_msgrequest = 0; - - while (1) { - - while (1) { - if (put == ch_sn2->w_local_GP.put) - break; - - msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> - local_msgqueue + (put % - ch->local_nentries) * - ch->entry_size); - - if (!(msg->flags & XPC_M_SN2_READY)) - break; - - put++; - } - - if (put == initial_put) { - /* nothing's changed */ - break; - } - - if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) != - initial_put) { - /* someone else beat us to it */ - DBUG_ON(ch_sn2->local_GP->put < initial_put); - break; - } - - /* we just set the new value of local_GP->put */ - - dev_dbg(xpc_chan, "local_GP->put changed to %lld, partid=%d, " - "channel=%d\n", put, ch->partid, ch->number); - - send_msgrequest = 1; - - /* - * We need to ensure that the message referenced by - * local_GP->put is not XPC_M_SN2_READY or that local_GP->put - * equals w_local_GP.put, so we'll go have a look. - */ - initial_put = put; - } - - if (send_msgrequest) - xpc_send_chctl_msgrequest_sn2(ch); -} - -/* - * Allocate an entry for a message from the message queue associated with the - * specified channel. - */ -static enum xp_retval -xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags, - struct xpc_msg_sn2 **address_of_msg) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - struct xpc_msg_sn2 *msg; - enum xp_retval ret; - s64 put; - - /* - * Get the next available message entry from the local message queue. - * If none are available, we'll make sure that we grab the latest - * GP values. - */ - ret = xpTimeout; - - while (1) { - - put = ch_sn2->w_local_GP.put; - smp_rmb(); /* guarantee that .put loads before .get */ - if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) { - - /* There are available message entries. We need to try - * to secure one for ourselves. We'll do this by trying - * to increment w_local_GP.put as long as someone else - * doesn't beat us to it. If they do, we'll have to - * try again. - */ - if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) == - put) { - /* we got the entry referenced by put */ - break; - } - continue; /* try again */ - } - - /* - * There aren't any available msg entries at this time. - * - * In waiting for a message entry to become available, - * we set a timeout in case the other side is not sending - * completion interrupts. This lets us fake a notify IRQ - * that will cause the notify IRQ handler to fetch the latest - * GP values as if an interrupt was sent by the other side. - */ - if (ret == xpTimeout) - xpc_send_chctl_local_msgrequest_sn2(ch); - - if (flags & XPC_NOWAIT) - return xpNoWait; - - ret = xpc_allocate_msg_wait(ch); - if (ret != xpInterrupted && ret != xpTimeout) - return ret; - } - - /* get the message's address and initialize it */ - msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + - (put % ch->local_nentries) * - ch->entry_size); - - DBUG_ON(msg->flags != 0); - msg->number = put; - - dev_dbg(xpc_chan, "w_local_GP.put changed to %lld; msg=0x%p, " - "msg_number=%lld, partid=%d, channel=%d\n", put + 1, - (void *)msg, msg->number, ch->partid, ch->number); - - *address_of_msg = msg; - return xpSuccess; -} - -/* - * Common code that does the actual sending of the message by advancing the - * local message queue's Put value and sends a chctl msgrequest to the - * partition the message is being sent to. - */ -static enum xp_retval -xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload, - u16 payload_size, u8 notify_type, xpc_notify_func func, - void *key) -{ - enum xp_retval ret = xpSuccess; - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - struct xpc_msg_sn2 *msg = msg; - struct xpc_notify_sn2 *notify = notify; - s64 msg_number; - s64 put; - - DBUG_ON(notify_type == XPC_N_CALL && func == NULL); - - if (XPC_MSG_SIZE(payload_size) > ch->entry_size) - return xpPayloadTooBig; - - xpc_msgqueue_ref(ch); - - if (ch->flags & XPC_C_DISCONNECTING) { - ret = ch->reason; - goto out_1; - } - if (!(ch->flags & XPC_C_CONNECTED)) { - ret = xpNotConnected; - goto out_1; - } - - ret = xpc_allocate_msg_sn2(ch, flags, &msg); - if (ret != xpSuccess) - goto out_1; - - msg_number = msg->number; - - if (notify_type != 0) { - /* - * Tell the remote side to send an ACK interrupt when the - * message has been delivered. - */ - msg->flags |= XPC_M_SN2_INTERRUPT; - - atomic_inc(&ch->n_to_notify); - - notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries]; - notify->func = func; - notify->key = key; - notify->type = notify_type; - - /* ??? Is a mb() needed here? */ - - if (ch->flags & XPC_C_DISCONNECTING) { - /* - * An error occurred between our last error check and - * this one. We will try to clear the type field from - * the notify entry. If we succeed then - * xpc_disconnect_channel() didn't already process - * the notify entry. - */ - if (cmpxchg(¬ify->type, notify_type, 0) == - notify_type) { - atomic_dec(&ch->n_to_notify); - ret = ch->reason; - } - goto out_1; - } - } - - memcpy(&msg->payload, payload, payload_size); - - msg->flags |= XPC_M_SN2_READY; - - /* - * The preceding store of msg->flags must occur before the following - * load of local_GP->put. - */ - smp_mb(); - - /* see if the message is next in line to be sent, if so send it */ - - put = ch_sn2->local_GP->put; - if (put == msg_number) - xpc_send_msgs_sn2(ch, put); - -out_1: - xpc_msgqueue_deref(ch); - return ret; -} - -/* - * Now we actually acknowledge the messages that have been delivered and ack'd - * by advancing the cached remote message queue's Get value and if requested - * send a chctl msgrequest to the message sender's partition. - * - * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition - * that sent the message. - */ -static void -xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) -{ - struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; - struct xpc_msg_sn2 *msg; - s64 get = initial_get + 1; - int send_msgrequest = 0; - - while (1) { - - while (1) { - if (get == ch_sn2->w_local_GP.get) - break; - - msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> - remote_msgqueue + (get % - ch->remote_nentries) * - ch->entry_size); - - if (!(msg->flags & XPC_M_SN2_DONE)) - break; - - msg_flags |= msg->flags; - get++; - } - - if (get == initial_get) { - /* nothing's changed */ - break; - } - - if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) != - initial_get) { - /* someone else beat us to it */ - DBUG_ON(ch_sn2->local_GP->get <= initial_get); - break; - } - - /* we just set the new value of local_GP->get */ - - dev_dbg(xpc_chan, "local_GP->get changed to %lld, partid=%d, " - "channel=%d\n", get, ch->partid, ch->number); - - send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT); - - /* - * We need to ensure that the message referenced by - * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get - * equals w_local_GP.get, so we'll go have a look. - */ - initial_get = get; - } - - if (send_msgrequest) - xpc_send_chctl_msgrequest_sn2(ch); -} - -static void -xpc_received_payload_sn2(struct xpc_channel *ch, void *payload) -{ - struct xpc_msg_sn2 *msg; - s64 msg_number; - s64 get; - - msg = container_of(payload, struct xpc_msg_sn2, payload); - msg_number = msg->number; - - dev_dbg(xpc_chan, "msg=0x%p, msg_number=%lld, partid=%d, channel=%d\n", - (void *)msg, msg_number, ch->partid, ch->number); - - DBUG_ON((((u64)msg - (u64)ch->sn.sn2.remote_msgqueue) / ch->entry_size) != - msg_number % ch->remote_nentries); - DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); - DBUG_ON(msg->flags & XPC_M_SN2_DONE); - - msg->flags |= XPC_M_SN2_DONE; - - /* - * The preceding store of msg->flags must occur before the following - * load of local_GP->get. - */ - smp_mb(); - - /* - * See if this message is next in line to be acknowledged as having - * been delivered. - */ - get = ch->sn.sn2.local_GP->get; - if (get == msg_number) - xpc_acknowledge_msgs_sn2(ch, get, msg->flags); -} - -static struct xpc_arch_operations xpc_arch_ops_sn2 = { - .setup_partitions = xpc_setup_partitions_sn2, - .teardown_partitions = xpc_teardown_partitions_sn2, - .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2, - .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2, - .setup_rsvd_page = xpc_setup_rsvd_page_sn2, - - .allow_hb = xpc_allow_hb_sn2, - .disallow_hb = xpc_disallow_hb_sn2, - .disallow_all_hbs = xpc_disallow_all_hbs_sn2, - .increment_heartbeat = xpc_increment_heartbeat_sn2, - .offline_heartbeat = xpc_offline_heartbeat_sn2, - .online_heartbeat = xpc_online_heartbeat_sn2, - .heartbeat_init = xpc_heartbeat_init_sn2, - .heartbeat_exit = xpc_heartbeat_exit_sn2, - .get_remote_heartbeat = xpc_get_remote_heartbeat_sn2, - - .request_partition_activation = - xpc_request_partition_activation_sn2, - .request_partition_reactivation = - xpc_request_partition_reactivation_sn2, - .request_partition_deactivation = - xpc_request_partition_deactivation_sn2, - .cancel_partition_deactivation_request = - xpc_cancel_partition_deactivation_request_sn2, - - .setup_ch_structures = xpc_setup_ch_structures_sn2, - .teardown_ch_structures = xpc_teardown_ch_structures_sn2, - - .make_first_contact = xpc_make_first_contact_sn2, - - .get_chctl_all_flags = xpc_get_chctl_all_flags_sn2, - .send_chctl_closerequest = xpc_send_chctl_closerequest_sn2, - .send_chctl_closereply = xpc_send_chctl_closereply_sn2, - .send_chctl_openrequest = xpc_send_chctl_openrequest_sn2, - .send_chctl_openreply = xpc_send_chctl_openreply_sn2, - .send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2, - .process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2, - - .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2, - - .setup_msg_structures = xpc_setup_msg_structures_sn2, - .teardown_msg_structures = xpc_teardown_msg_structures_sn2, - - .indicate_partition_engaged = xpc_indicate_partition_engaged_sn2, - .indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2, - .partition_engaged = xpc_partition_engaged_sn2, - .any_partition_engaged = xpc_any_partition_engaged_sn2, - .assume_partition_disengaged = xpc_assume_partition_disengaged_sn2, - - .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2, - .send_payload = xpc_send_payload_sn2, - .get_deliverable_payload = xpc_get_deliverable_payload_sn2, - .received_payload = xpc_received_payload_sn2, - .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2, -}; - -int -xpc_init_sn2(void) -{ - int ret; - size_t buf_size; - - xpc_arch_ops = xpc_arch_ops_sn2; - - if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) { - dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is " - "larger than %d\n", XPC_MSG_HDR_MAX_SIZE); - return -E2BIG; - } - - buf_size = max(XPC_RP_VARS_SIZE, - XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2); - xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size, - GFP_KERNEL, - &xpc_remote_copy_buffer_base_sn2); - if (xpc_remote_copy_buffer_sn2 == NULL) { - dev_err(xpc_part, "can't get memory for remote copy buffer\n"); - return -ENOMEM; - } - - /* open up protections for IPI and [potentially] amo operations */ - xpc_allow_IPI_ops_sn2(); - xpc_allow_amo_ops_shub_wars_1_1_sn2(); - - /* - * This is safe to do before the xpc_hb_checker thread has started - * because the handler releases a wait queue. If an interrupt is - * received before the thread is waiting, it will not go to sleep, - * but rather immediately process the interrupt. - */ - ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0, - "xpc hb", NULL); - if (ret != 0) { - dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " - "errno=%d\n", -ret); - xpc_disallow_IPI_ops_sn2(); - kfree(xpc_remote_copy_buffer_base_sn2); - } - return ret; -} - -void -xpc_exit_sn2(void) -{ - free_irq(SGI_XPC_ACTIVATE, NULL); - xpc_disallow_IPI_ops_sn2(); - kfree(xpc_remote_copy_buffer_base_sn2); -} diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 0c6de97dd347..98c60f11b76b 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -27,7 +27,7 @@ #if defined CONFIG_X86_64 #include <asm/uv/bios.h> #include <asm/uv/uv_irq.h> -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#elif defined CONFIG_IA64_SGI_UV #include <asm/sn/intr.h> #include <asm/sn/sn_sal.h> #endif @@ -35,7 +35,7 @@ #include "../sgi-gru/grukservices.h" #include "xpc.h" -#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#if defined CONFIG_IA64_SGI_UV struct uv_IO_APIC_route_entry { __u64 vector : 8, delivery_mode : 3, @@ -48,6 +48,8 @@ struct uv_IO_APIC_route_entry { __reserved_2 : 15, dest : 32; }; + +#define sn_partition_id 0 #endif static struct xpc_heartbeat_uv *xpc_heartbeat_uv; @@ -119,7 +121,7 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#elif defined CONFIG_IA64_SGI_UV if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0) mq->irq = SGI_XPC_ACTIVATE; else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0) @@ -142,7 +144,7 @@ xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) #if defined CONFIG_X86_64 uv_teardown_irq(mq->irq); -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#elif defined CONFIG_IA64_SGI_UV int mmr_pnode; unsigned long mmr_value; @@ -160,7 +162,7 @@ xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) { int ret; -#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#if defined CONFIG_IA64_SGI_UV int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address), @@ -195,7 +197,7 @@ xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq) #if defined CONFIG_X86_64 ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num); BUG_ON(ret != BIOS_STATUS_SUCCESS); -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#elif defined CONFIG_IA64_SGI_UV ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num); BUG_ON(ret != SALRET_OK); #else @@ -694,7 +696,7 @@ again: if (gru_mq_desc == NULL) { gru_mq_desc = kmalloc(sizeof(struct gru_message_queue_desc), - GFP_KERNEL); + GFP_ATOMIC); if (gru_mq_desc == NULL) { ret = xpNoMemory; goto done; @@ -794,7 +796,7 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, else ret = xpBiosError; -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#elif defined CONFIG_IA64_SGI_UV status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len); if (status == SALRET_OK) ret = xpSuccess; @@ -1678,7 +1680,7 @@ xpc_received_payload_uv(struct xpc_channel *ch, void *payload) XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); } -static struct xpc_arch_operations xpc_arch_ops_uv = { +static const struct xpc_arch_operations xpc_arch_ops_uv = { .setup_partitions = xpc_setup_partitions_uv, .teardown_partitions = xpc_teardown_partitions_uv, .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv, diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index 44d750d98bc8..ada94e6a3c91 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -496,7 +496,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * Deal with transmit timeouts coming from the network layer. */ static void -xpnet_dev_tx_timeout(struct net_device *dev) +xpnet_dev_tx_timeout(struct net_device *dev, unsigned int txqueue) { dev->stats.tx_errors++; } @@ -515,7 +515,7 @@ xpnet_init(void) { int result; - if (!is_shub() && !is_uv()) + if (!is_uv()) return -ENODEV; dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); diff --git a/drivers/misc/spear13xx_pcie_gadget.c b/drivers/misc/spear13xx_pcie_gadget.c deleted file mode 100644 index ee120dcbb3e6..000000000000 --- a/drivers/misc/spear13xx_pcie_gadget.c +++ /dev/null @@ -1,797 +0,0 @@ -/* - * drivers/misc/spear13xx_pcie_gadget.c - * - * Copyright (C) 2010 ST Microelectronics - * Pratyush Anand<pratyush.anand@gmail.com> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include <linux/device.h> -#include <linux/clk.h> -#include <linux/slab.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/platform_device.h> -#include <linux/pci_regs.h> -#include <linux/configfs.h> -#include <mach/pcie.h> -#include <mach/misc_regs.h> - -#define IN0_MEM_SIZE (200 * 1024 * 1024 - 1) -/* In current implementation address translation is done using IN0 only. - * So IN1 start address and IN0 end address has been kept same -*/ -#define IN1_MEM_SIZE (0 * 1024 * 1024 - 1) -#define IN_IO_SIZE (20 * 1024 * 1024 - 1) -#define IN_CFG0_SIZE (12 * 1024 * 1024 - 1) -#define IN_CFG1_SIZE (12 * 1024 * 1024 - 1) -#define IN_MSG_SIZE (12 * 1024 * 1024 - 1) -/* Keep default BAR size as 4K*/ -/* AORAM would be mapped by default*/ -#define INBOUND_ADDR_MASK (SPEAR13XX_SYSRAM1_SIZE - 1) - -#define INT_TYPE_NO_INT 0 -#define INT_TYPE_INTX 1 -#define INT_TYPE_MSI 2 -struct spear_pcie_gadget_config { - void __iomem *base; - void __iomem *va_app_base; - void __iomem *va_dbi_base; - char int_type[10]; - ulong requested_msi; - ulong configured_msi; - ulong bar0_size; - ulong bar0_rw_offset; - void __iomem *va_bar0_address; -}; - -struct pcie_gadget_target { - struct configfs_subsystem subsys; - struct spear_pcie_gadget_config config; -}; - -struct pcie_gadget_target_attr { - struct configfs_attribute attr; - ssize_t (*show)(struct spear_pcie_gadget_config *config, - char *buf); - ssize_t (*store)(struct spear_pcie_gadget_config *config, - const char *buf, - size_t count); -}; - -static void enable_dbi_access(struct pcie_app_reg __iomem *app_reg) -{ - /* Enable DBI access */ - writel(readl(&app_reg->slv_armisc) | (1 << AXI_OP_DBI_ACCESS_ID), - &app_reg->slv_armisc); - writel(readl(&app_reg->slv_awmisc) | (1 << AXI_OP_DBI_ACCESS_ID), - &app_reg->slv_awmisc); - -} - -static void disable_dbi_access(struct pcie_app_reg __iomem *app_reg) -{ - /* disable DBI access */ - writel(readl(&app_reg->slv_armisc) & ~(1 << AXI_OP_DBI_ACCESS_ID), - &app_reg->slv_armisc); - writel(readl(&app_reg->slv_awmisc) & ~(1 << AXI_OP_DBI_ACCESS_ID), - &app_reg->slv_awmisc); - -} - -static void spear_dbi_read_reg(struct spear_pcie_gadget_config *config, - int where, int size, u32 *val) -{ - struct pcie_app_reg __iomem *app_reg = config->va_app_base; - ulong va_address; - - /* Enable DBI access */ - enable_dbi_access(app_reg); - - va_address = (ulong)config->va_dbi_base + (where & ~0x3); - - *val = readl(va_address); - - if (size == 1) - *val = (*val >> (8 * (where & 3))) & 0xff; - else if (size == 2) - *val = (*val >> (8 * (where & 3))) & 0xffff; - - /* Disable DBI access */ - disable_dbi_access(app_reg); -} - -static void spear_dbi_write_reg(struct spear_pcie_gadget_config *config, - int where, int size, u32 val) -{ - struct pcie_app_reg __iomem *app_reg = config->va_app_base; - ulong va_address; - - /* Enable DBI access */ - enable_dbi_access(app_reg); - - va_address = (ulong)config->va_dbi_base + (where & ~0x3); - - if (size == 4) - writel(val, va_address); - else if (size == 2) - writew(val, va_address + (where & 2)); - else if (size == 1) - writeb(val, va_address + (where & 3)); - - /* Disable DBI access */ - disable_dbi_access(app_reg); -} - -#define PCI_FIND_CAP_TTL 48 - -static int pci_find_own_next_cap_ttl(struct spear_pcie_gadget_config *config, - u32 pos, int cap, int *ttl) -{ - u32 id; - - while ((*ttl)--) { - spear_dbi_read_reg(config, pos, 1, &pos); - if (pos < 0x40) - break; - pos &= ~3; - spear_dbi_read_reg(config, pos + PCI_CAP_LIST_ID, 1, &id); - if (id == 0xff) - break; - if (id == cap) - return pos; - pos += PCI_CAP_LIST_NEXT; - } - return 0; -} - -static int pci_find_own_next_cap(struct spear_pcie_gadget_config *config, - u32 pos, int cap) -{ - int ttl = PCI_FIND_CAP_TTL; - - return pci_find_own_next_cap_ttl(config, pos, cap, &ttl); -} - -static int pci_find_own_cap_start(struct spear_pcie_gadget_config *config, - u8 hdr_type) -{ - u32 status; - - spear_dbi_read_reg(config, PCI_STATUS, 2, &status); - if (!(status & PCI_STATUS_CAP_LIST)) - return 0; - - switch (hdr_type) { - case PCI_HEADER_TYPE_NORMAL: - case PCI_HEADER_TYPE_BRIDGE: - return PCI_CAPABILITY_LIST; - case PCI_HEADER_TYPE_CARDBUS: - return PCI_CB_CAPABILITY_LIST; - default: - return 0; - } - - return 0; -} - -/* - * Tell if a device supports a given PCI capability. - * Returns the address of the requested capability structure within the - * device's PCI configuration space or 0 in case the device does not - * support it. Possible values for @cap: - * - * %PCI_CAP_ID_PM Power Management - * %PCI_CAP_ID_AGP Accelerated Graphics Port - * %PCI_CAP_ID_VPD Vital Product Data - * %PCI_CAP_ID_SLOTID Slot Identification - * %PCI_CAP_ID_MSI Message Signalled Interrupts - * %PCI_CAP_ID_CHSWP CompactPCI HotSwap - * %PCI_CAP_ID_PCIX PCI-X - * %PCI_CAP_ID_EXP PCI Express - */ -static int pci_find_own_capability(struct spear_pcie_gadget_config *config, - int cap) -{ - u32 pos; - u32 hdr_type; - - spear_dbi_read_reg(config, PCI_HEADER_TYPE, 1, &hdr_type); - - pos = pci_find_own_cap_start(config, hdr_type); - if (pos) - pos = pci_find_own_next_cap(config, pos, cap); - - return pos; -} - -static irqreturn_t spear_pcie_gadget_irq(int irq, void *dev_id) -{ - return 0; -} - -/* - * configfs interfaces show/store functions - */ - -static struct pcie_gadget_target *to_target(struct config_item *item) -{ - return item ? - container_of(to_configfs_subsystem(to_config_group(item)), - struct pcie_gadget_target, subsys) : NULL; -} - -static ssize_t pcie_gadget_link_show(struct config_item *item, char *buf) -{ - struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; - - if (readl(&app_reg->app_status_1) & ((u32)1 << XMLH_LINK_UP_ID)) - return sprintf(buf, "UP"); - else - return sprintf(buf, "DOWN"); -} - -static ssize_t pcie_gadget_link_store(struct config_item *item, - const char *buf, size_t count) -{ - struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; - - if (sysfs_streq(buf, "UP")) - writel(readl(&app_reg->app_ctrl_0) | (1 << APP_LTSSM_ENABLE_ID), - &app_reg->app_ctrl_0); - else if (sysfs_streq(buf, "DOWN")) - writel(readl(&app_reg->app_ctrl_0) - & ~(1 << APP_LTSSM_ENABLE_ID), - &app_reg->app_ctrl_0); - else - return -EINVAL; - return count; -} - -static ssize_t pcie_gadget_int_type_show(struct config_item *item, char *buf) -{ - return sprintf(buf, "%s", to_target(item)->int_type); -} - -static ssize_t pcie_gadget_int_type_store(struct config_item *item, - const char *buf, size_t count) -{ - struct spear_pcie_gadget_config *config = to_target(item) - u32 cap, vec, flags; - ulong vector; - - if (sysfs_streq(buf, "INTA")) - spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1); - - else if (sysfs_streq(buf, "MSI")) { - vector = config->requested_msi; - vec = 0; - while (vector > 1) { - vector /= 2; - vec++; - } - spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 0); - cap = pci_find_own_capability(config, PCI_CAP_ID_MSI); - spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags); - flags &= ~PCI_MSI_FLAGS_QMASK; - flags |= vec << 1; - spear_dbi_write_reg(config, cap + PCI_MSI_FLAGS, 1, flags); - } else - return -EINVAL; - - strcpy(config->int_type, buf); - - return count; -} - -static ssize_t pcie_gadget_no_of_msi_show(struct config_item *item, char *buf) -{ - struct spear_pcie_gadget_config *config = to_target(item) - struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; - u32 cap, vec, flags; - ulong vector; - - if ((readl(&app_reg->msg_status) & (1 << CFG_MSI_EN_ID)) - != (1 << CFG_MSI_EN_ID)) - vector = 0; - else { - cap = pci_find_own_capability(config, PCI_CAP_ID_MSI); - spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags); - flags &= ~PCI_MSI_FLAGS_QSIZE; - vec = flags >> 4; - vector = 1; - while (vec--) - vector *= 2; - } - config->configured_msi = vector; - - return sprintf(buf, "%lu", vector); -} - -static ssize_t pcie_gadget_no_of_msi_store(struct config_item *item, - const char *buf, size_t count) -{ - int ret; - - ret = kstrtoul(buf, 0, &to_target(item)->requested_msi); - if (ret) - return ret; - - if (config->requested_msi > 32) - config->requested_msi = 32; - - return count; -} - -static ssize_t pcie_gadget_inta_store(struct config_item *item, - const char *buf, size_t count) -{ - struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; - ulong en; - int ret; - - ret = kstrtoul(buf, 0, &en); - if (ret) - return ret; - - if (en) - writel(readl(&app_reg->app_ctrl_0) | (1 << SYS_INT_ID), - &app_reg->app_ctrl_0); - else - writel(readl(&app_reg->app_ctrl_0) & ~(1 << SYS_INT_ID), - &app_reg->app_ctrl_0); - - return count; -} - -static ssize_t pcie_gadget_send_msi_store(struct config_item *item, - const char *buf, size_t count) -{ - struct spear_pcie_gadget_config *config = to_target(item) - struct pcie_app_reg __iomem *app_reg = config->va_app_base; - ulong vector; - u32 ven_msi; - int ret; - - ret = kstrtoul(buf, 0, &vector); - if (ret) - return ret; - - if (!config->configured_msi) - return -EINVAL; - - if (vector >= config->configured_msi) - return -EINVAL; - - ven_msi = readl(&app_reg->ven_msi_1); - ven_msi &= ~VEN_MSI_FUN_NUM_MASK; - ven_msi |= 0 << VEN_MSI_FUN_NUM_ID; - ven_msi &= ~VEN_MSI_TC_MASK; - ven_msi |= 0 << VEN_MSI_TC_ID; - ven_msi &= ~VEN_MSI_VECTOR_MASK; - ven_msi |= vector << VEN_MSI_VECTOR_ID; - - /* generating interrupt for msi vector */ - ven_msi |= VEN_MSI_REQ_EN; - writel(ven_msi, &app_reg->ven_msi_1); - udelay(1); - ven_msi &= ~VEN_MSI_REQ_EN; - writel(ven_msi, &app_reg->ven_msi_1); - - return count; -} - -static ssize_t pcie_gadget_vendor_id_show(struct config_item *item, char *buf) -{ - u32 id; - - spear_dbi_read_reg(to_target(item), PCI_VENDOR_ID, 2, &id); - - return sprintf(buf, "%x", id); -} - -static ssize_t pcie_gadget_vendor_id_store(struct config_item *item, - const char *buf, size_t count) -{ - ulong id; - int ret; - - ret = kstrtoul(buf, 0, &id); - if (ret) - return ret; - - spear_dbi_write_reg(to_target(item), PCI_VENDOR_ID, 2, id); - - return count; -} - -static ssize_t pcie_gadget_device_id_show(struct config_item *item, char *buf) -{ - u32 id; - - spear_dbi_read_reg(to_target(item), PCI_DEVICE_ID, 2, &id); - - return sprintf(buf, "%x", id); -} - -static ssize_t pcie_gadget_device_id_store(struct config_item *item, - const char *buf, size_t count) -{ - ulong id; - int ret; - - ret = kstrtoul(buf, 0, &id); - if (ret) - return ret; - - spear_dbi_write_reg(to_target(item), PCI_DEVICE_ID, 2, id); - - return count; -} - -static ssize_t pcie_gadget_bar0_size_show(struct config_item *item, char *buf) -{ - return sprintf(buf, "%lx", to_target(item)->bar0_size); -} - -static ssize_t pcie_gadget_bar0_size_store(struct config_item *item, - const char *buf, size_t count) -{ - struct spear_pcie_gadget_config *config = to_target(item) - ulong size; - u32 pos, pos1; - u32 no_of_bit = 0; - int ret; - - ret = kstrtoul(buf, 0, &size); - if (ret) - return ret; - - /* min bar size is 256 */ - if (size <= 0x100) - size = 0x100; - /* max bar size is 1MB*/ - else if (size >= 0x100000) - size = 0x100000; - else { - pos = 0; - pos1 = 0; - while (pos < 21) { - pos = find_next_bit((ulong *)&size, 21, pos); - if (pos != 21) - pos1 = pos + 1; - pos++; - no_of_bit++; - } - if (no_of_bit == 2) - pos1--; - - size = 1 << pos1; - } - config->bar0_size = size; - spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, size - 1); - - return count; -} - -static ssize_t pcie_gadget_bar0_address_show(struct config_item *item, - char *buf) -{ - struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; - - u32 address = readl(&app_reg->pim0_mem_addr_start); - - return sprintf(buf, "%x", address); -} - -static ssize_t pcie_gadget_bar0_address_store(struct config_item *item, - const char *buf, size_t count) -{ - struct spear_pcie_gadget_config *config = to_target(item) - struct pcie_app_reg __iomem *app_reg = config->va_app_base; - ulong address; - int ret; - - ret = kstrtoul(buf, 0, &address); - if (ret) - return ret; - - address &= ~(config->bar0_size - 1); - if (config->va_bar0_address) - iounmap(config->va_bar0_address); - config->va_bar0_address = ioremap(address, config->bar0_size); - if (!config->va_bar0_address) - return -ENOMEM; - - writel(address, &app_reg->pim0_mem_addr_start); - - return count; -} - -static ssize_t pcie_gadget_bar0_rw_offset_show(struct config_item *item, - char *buf) -{ - return sprintf(buf, "%lx", to_target(item)->bar0_rw_offset); -} - -static ssize_t pcie_gadget_bar0_rw_offset_store(struct config_item *item, - const char *buf, size_t count) -{ - ulong offset; - int ret; - - ret = kstrtoul(buf, 0, &offset); - if (ret) - return ret; - - if (offset % 4) - return -EINVAL; - - to_target(item)->bar0_rw_offset = offset; - - return count; -} - -static ssize_t pcie_gadget_bar0_data_show(struct config_item *item, char *buf) -{ - struct spear_pcie_gadget_config *config = to_target(item) - ulong data; - - if (!config->va_bar0_address) - return -ENOMEM; - - data = readl((ulong)config->va_bar0_address + config->bar0_rw_offset); - - return sprintf(buf, "%lx", data); -} - -static ssize_t pcie_gadget_bar0_data_store(struct config_item *item, - const char *buf, size_t count) -{ - struct spear_pcie_gadget_config *config = to_target(item) - ulong data; - int ret; - - ret = kstrtoul(buf, 0, &data); - if (ret) - return ret; - - if (!config->va_bar0_address) - return -ENOMEM; - - writel(data, (ulong)config->va_bar0_address + config->bar0_rw_offset); - - return count; -} - -CONFIGFS_ATTR(pcie_gadget_, link); -CONFIGFS_ATTR(pcie_gadget_, int_type); -CONFIGFS_ATTR(pcie_gadget_, no_of_msi); -CONFIGFS_ATTR_WO(pcie_gadget_, inta); -CONFIGFS_ATTR_WO(pcie_gadget_, send_msi); -CONFIGFS_ATTR(pcie_gadget_, vendor_id); -CONFIGFS_ATTR(pcie_gadget_, device_id); -CONFIGFS_ATTR(pcie_gadget_, bar0_size); -CONFIGFS_ATTR(pcie_gadget_, bar0_address); -CONFIGFS_ATTR(pcie_gadget_, bar0_rw_offset); -CONFIGFS_ATTR(pcie_gadget_, bar0_data); - -static struct configfs_attribute *pcie_gadget_target_attrs[] = { - &pcie_gadget_attr_link, - &pcie_gadget_attr_int_type, - &pcie_gadget_attr_no_of_msi, - &pcie_gadget_attr_inta, - &pcie_gadget_attr_send_msi, - &pcie_gadget_attr_vendor_id, - &pcie_gadget_attr_device_id, - &pcie_gadget_attr_bar0_size, - &pcie_gadget_attr_bar0_address, - &pcie_gadget_attr_bar0_rw_offset, - &pcie_gadget_attr_bar0_data, - NULL, -}; - -static struct config_item_type pcie_gadget_target_type = { - .ct_attrs = pcie_gadget_target_attrs, - .ct_owner = THIS_MODULE, -}; - -static void spear13xx_pcie_device_init(struct spear_pcie_gadget_config *config) -{ - struct pcie_app_reg __iomem *app_reg = config->va_app_base; - - /*setup registers for outbound translation */ - - writel(config->base, &app_reg->in0_mem_addr_start); - writel(app_reg->in0_mem_addr_start + IN0_MEM_SIZE, - &app_reg->in0_mem_addr_limit); - writel(app_reg->in0_mem_addr_limit + 1, &app_reg->in1_mem_addr_start); - writel(app_reg->in1_mem_addr_start + IN1_MEM_SIZE, - &app_reg->in1_mem_addr_limit); - writel(app_reg->in1_mem_addr_limit + 1, &app_reg->in_io_addr_start); - writel(app_reg->in_io_addr_start + IN_IO_SIZE, - &app_reg->in_io_addr_limit); - writel(app_reg->in_io_addr_limit + 1, &app_reg->in_cfg0_addr_start); - writel(app_reg->in_cfg0_addr_start + IN_CFG0_SIZE, - &app_reg->in_cfg0_addr_limit); - writel(app_reg->in_cfg0_addr_limit + 1, &app_reg->in_cfg1_addr_start); - writel(app_reg->in_cfg1_addr_start + IN_CFG1_SIZE, - &app_reg->in_cfg1_addr_limit); - writel(app_reg->in_cfg1_addr_limit + 1, &app_reg->in_msg_addr_start); - writel(app_reg->in_msg_addr_start + IN_MSG_SIZE, - &app_reg->in_msg_addr_limit); - - writel(app_reg->in0_mem_addr_start, &app_reg->pom0_mem_addr_start); - writel(app_reg->in1_mem_addr_start, &app_reg->pom1_mem_addr_start); - writel(app_reg->in_io_addr_start, &app_reg->pom_io_addr_start); - - /*setup registers for inbound translation */ - - /* Keep AORAM mapped at BAR0 as default */ - config->bar0_size = INBOUND_ADDR_MASK + 1; - spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, INBOUND_ADDR_MASK); - spear_dbi_write_reg(config, PCI_BASE_ADDRESS_0, 4, 0xC); - config->va_bar0_address = ioremap(SPEAR13XX_SYSRAM1_BASE, - config->bar0_size); - - writel(SPEAR13XX_SYSRAM1_BASE, &app_reg->pim0_mem_addr_start); - writel(0, &app_reg->pim1_mem_addr_start); - writel(INBOUND_ADDR_MASK + 1, &app_reg->mem0_addr_offset_limit); - - writel(0x0, &app_reg->pim_io_addr_start); - writel(0x0, &app_reg->pim_io_addr_start); - writel(0x0, &app_reg->pim_rom_addr_start); - - writel(DEVICE_TYPE_EP | (1 << MISCTRL_EN_ID) - | ((u32)1 << REG_TRANSLATION_ENABLE), - &app_reg->app_ctrl_0); - /* disable all rx interrupts */ - writel(0, &app_reg->int_mask); - - /* Select INTA as default*/ - spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1); -} - -static int spear_pcie_gadget_probe(struct platform_device *pdev) -{ - struct resource *res0, *res1; - unsigned int status = 0; - int irq; - struct clk *clk; - static struct pcie_gadget_target *target; - struct spear_pcie_gadget_config *config; - struct config_item *cg_item; - struct configfs_subsystem *subsys; - - target = devm_kzalloc(&pdev->dev, sizeof(*target), GFP_KERNEL); - if (!target) { - dev_err(&pdev->dev, "out of memory\n"); - return -ENOMEM; - } - - cg_item = &target->subsys.su_group.cg_item; - sprintf(cg_item->ci_namebuf, "pcie_gadget.%d", pdev->id); - cg_item->ci_type = &pcie_gadget_target_type; - config = &target->config; - - /* get resource for application registers*/ - res0 = platform_get_resource(pdev, IORESOURCE_MEM, 0); - config->va_app_base = devm_ioremap_resource(&pdev->dev, res0); - if (IS_ERR(config->va_app_base)) { - dev_err(&pdev->dev, "ioremap fail\n"); - return PTR_ERR(config->va_app_base); - } - - /* get resource for dbi registers*/ - res1 = platform_get_resource(pdev, IORESOURCE_MEM, 1); - config->base = (void __iomem *)res1->start; - - config->va_dbi_base = devm_ioremap_resource(&pdev->dev, res1); - if (IS_ERR(config->va_dbi_base)) { - dev_err(&pdev->dev, "ioremap fail\n"); - return PTR_ERR(config->va_dbi_base); - } - - platform_set_drvdata(pdev, target); - - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "no update irq?\n"); - return irq; - } - - status = devm_request_irq(&pdev->dev, irq, spear_pcie_gadget_irq, - 0, pdev->name, NULL); - if (status) { - dev_err(&pdev->dev, - "pcie gadget interrupt IRQ%d already claimed\n", irq); - return status; - } - - /* Register configfs hooks */ - subsys = &target->subsys; - config_group_init(&subsys->su_group); - mutex_init(&subsys->su_mutex); - status = configfs_register_subsystem(subsys); - if (status) - return status; - - /* - * init basic pcie application registers - * do not enable clock if it is PCIE0.Ideally , all controller should - * have been independent from others with respect to clock. But PCIE1 - * and 2 depends on PCIE0.So PCIE0 clk is provided during board init. - */ - if (pdev->id == 1) { - /* - * Ideally CFG Clock should have been also enabled here. But - * it is done currently during board init routne - */ - clk = clk_get_sys("pcie1", NULL); - if (IS_ERR(clk)) { - pr_err("%s:couldn't get clk for pcie1\n", __func__); - return PTR_ERR(clk); - } - status = clk_enable(clk); - if (status) { - pr_err("%s:couldn't enable clk for pcie1\n", __func__); - return status; - } - } else if (pdev->id == 2) { - /* - * Ideally CFG Clock should have been also enabled here. But - * it is done currently during board init routne - */ - clk = clk_get_sys("pcie2", NULL); - if (IS_ERR(clk)) { - pr_err("%s:couldn't get clk for pcie2\n", __func__); - return PTR_ERR(clk); - } - status = clk_enable(clk); - if (status) { - pr_err("%s:couldn't enable clk for pcie2\n", __func__); - return status; - } - } - spear13xx_pcie_device_init(config); - - return 0; -} - -static int spear_pcie_gadget_remove(struct platform_device *pdev) -{ - static struct pcie_gadget_target *target; - - target = platform_get_drvdata(pdev); - - configfs_unregister_subsystem(&target->subsys); - - return 0; -} - -static void spear_pcie_gadget_shutdown(struct platform_device *pdev) -{ -} - -static struct platform_driver spear_pcie_gadget_driver = { - .probe = spear_pcie_gadget_probe, - .remove = spear_pcie_gadget_remove, - .shutdown = spear_pcie_gadget_shutdown, - .driver = { - .name = "pcie-gadget-spear", - .bus = &platform_bus_type - }, -}; - -module_platform_driver(spear_pcie_gadget_driver); - -MODULE_ALIAS("platform:pcie-gadget-spear"); -MODULE_AUTHOR("Pratyush Anand"); -MODULE_LICENSE("GPL"); diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c index 426ad912b441..cb57ac6ab4c3 100644 --- a/drivers/misc/sram-exec.c +++ b/drivers/misc/sram-exec.c @@ -85,6 +85,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, unsigned long base; int pages; void *dst_cpy; + int ret; mutex_lock(&exec_pool_list_mutex); list_for_each_entry(p, &exec_pool_list, list) { @@ -96,7 +97,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, if (!part) return NULL; - if (!addr_in_gen_pool(pool, (unsigned long)dst, size)) + if (!gen_pool_has_addr(pool, (unsigned long)dst, size)) return NULL; base = (unsigned long)part->base; @@ -104,16 +105,28 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, mutex_lock(&part->lock); - set_memory_nx((unsigned long)base, pages); - set_memory_rw((unsigned long)base, pages); + ret = set_memory_nx((unsigned long)base, pages); + if (ret) + goto error_out; + ret = set_memory_rw((unsigned long)base, pages); + if (ret) + goto error_out; dst_cpy = fncpy(dst, src, size); - set_memory_ro((unsigned long)base, pages); - set_memory_x((unsigned long)base, pages); + ret = set_memory_ro((unsigned long)base, pages); + if (ret) + goto error_out; + ret = set_memory_x((unsigned long)base, pages); + if (ret) + goto error_out; mutex_unlock(&part->lock); return dst_cpy; + +error_out: + mutex_unlock(&part->lock); + return NULL; } EXPORT_SYMBOL_GPL(sram_exec_copy); diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c index f30448bf3a63..6c1a23cb3e8c 100644 --- a/drivers/misc/sram.c +++ b/drivers/misc/sram.c @@ -340,8 +340,6 @@ static const struct of_device_id sram_dt_ids[] = { static int sram_probe(struct platform_device *pdev) { struct sram_dev *sram; - struct resource *res; - size_t size; int ret; int (*init_func)(void); @@ -351,25 +349,14 @@ static int sram_probe(struct platform_device *pdev) sram->dev = &pdev->dev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(sram->dev, "found no memory resource\n"); - return -EINVAL; - } - - size = resource_size(res); - - if (!devm_request_mem_region(sram->dev, res->start, size, pdev->name)) { - dev_err(sram->dev, "could not request region for resource\n"); - return -EBUSY; - } - if (of_property_read_bool(pdev->dev.of_node, "no-memory-wc")) - sram->virt_base = devm_ioremap(sram->dev, res->start, size); + sram->virt_base = devm_platform_ioremap_resource(pdev, 0); else - sram->virt_base = devm_ioremap_wc(sram->dev, res->start, size); - if (!sram->virt_base) - return -ENOMEM; + sram->virt_base = devm_platform_ioremap_resource_wc(pdev, 0); + if (IS_ERR(sram->virt_base)) { + dev_err(&pdev->dev, "could not map SRAM registers\n"); + return PTR_ERR(sram->virt_base); + } sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY), NUMA_NO_NODE, NULL); @@ -382,7 +369,8 @@ static int sram_probe(struct platform_device *pdev) else clk_prepare_enable(sram->clk); - ret = sram_reserve_regions(sram, res); + ret = sram_reserve_regions(sram, + platform_get_resource(pdev, IORESOURCE_MEM, 0)); if (ret) goto err_disable_clk; diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 7d9e23aa0b92..14136d2cc8f9 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -708,7 +708,6 @@ EXPORT_SYMBOL_GPL(st_unregister); */ static int st_tty_open(struct tty_struct *tty) { - int err = 0; struct st_data_s *st_gdata; pr_info("%s ", __func__); @@ -731,13 +730,14 @@ static int st_tty_open(struct tty_struct *tty) */ st_kim_complete(st_gdata->kim_data); pr_debug("done %s", __func__); - return err; + + return 0; } static void st_tty_close(struct tty_struct *tty) { - unsigned char i = ST_MAX_CHANNELS; - unsigned long flags = 0; + unsigned char i; + unsigned long flags; struct st_data_s *st_gdata = tty->disc_data; pr_info("%s ", __func__); diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c index 09db397df287..6d71865c8042 100644 --- a/drivers/misc/tsl2550.c +++ b/drivers/misc/tsl2550.c @@ -148,16 +148,14 @@ static int tsl2550_calculate_lux(u8 ch0, u8 ch1) u16 c0 = count_lut[ch0]; u16 c1 = count_lut[ch1]; - /* - * Calculate ratio. - * Note: the "128" is a scaling factor - */ - u8 r = 128; - /* Avoid division by 0 and count 1 cannot be greater than count 0 */ if (c1 <= c0) if (c0) { - r = c1 * 128 / c0; + /* + * Calculate ratio. + * Note: the "128" is a scaling factor + */ + u8 r = c1 * 128 / c0; /* Calculate LUX */ lux = ((c0 - c1) * ratio_lut[r]) / 256; diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 8840299420e0..b837e7eba5f7 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -17,6 +17,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> +#include <linux/io.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/vmalloc.h> @@ -691,7 +692,6 @@ static int vmballoon_alloc_page_list(struct vmballoon *b, } if (page) { - vmballoon_mark_page_offline(page, ctl->page_size); /* Success. Add the page to the list and continue. */ list_add(&page->lru, &ctl->pages); continue; @@ -930,7 +930,6 @@ static void vmballoon_release_page_list(struct list_head *page_list, list_for_each_entry_safe(page, tmp, page_list, lru) { list_del(&page->lru); - vmballoon_mark_page_online(page, page_size); __free_pages(page, vmballoon_page_order(page_size)); } @@ -1005,6 +1004,7 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b, enum vmballoon_page_size_type page_size) { unsigned long flags; + struct page *page; if (page_size == VMW_BALLOON_4K_PAGE) { balloon_page_list_enqueue(&b->b_dev_info, pages); @@ -1014,6 +1014,11 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b, * for the balloon compaction mechanism. */ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); + + list_for_each_entry(page, pages, lru) { + vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE); + } + list_splice_init(pages, &b->huge_pages); __count_vm_events(BALLOON_INFLATE, *n_pages * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)); @@ -1056,6 +1061,8 @@ static void vmballoon_dequeue_page_list(struct vmballoon *b, /* 2MB pages */ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) { + vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE); + list_move(&page->lru, pages); if (++i == n_req_pages) break; diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c index bad89b6e0802..345addd9306d 100644 --- a/drivers/misc/vmw_vmci/vmci_doorbell.c +++ b/drivers/misc/vmw_vmci/vmci_doorbell.c @@ -310,7 +310,8 @@ int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle) entry = container_of(resource, struct dbell_entry, resource); if (entry->run_delayed) { - schedule_work(&entry->work); + if (!schedule_work(&entry->work)) + vmci_resource_put(resource); } else { entry->notify_cb(entry->client_data); vmci_resource_put(resource); @@ -361,7 +362,8 @@ static void dbell_fire_entries(u32 notify_idx) atomic_read(&dbell->active) == 1) { if (dbell->run_delayed) { vmci_resource_get(&dbell->resource); - schedule_work(&dbell->work); + if (!schedule_work(&dbell->work)) + vmci_resource_put(&dbell->resource); } else { dbell->notify_cb(dbell->client_data); } diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c index 819e35995d32..cbb706dabede 100644 --- a/drivers/misc/vmw_vmci/vmci_driver.c +++ b/drivers/misc/vmw_vmci/vmci_driver.c @@ -28,6 +28,10 @@ MODULE_PARM_DESC(disable_guest, static bool vmci_guest_personality_initialized; static bool vmci_host_personality_initialized; +static DEFINE_MUTEX(vmci_vsock_mutex); /* protects vmci_vsock_transport_cb */ +static vmci_vsock_cb vmci_vsock_transport_cb; +static bool vmci_vsock_cb_host_called; + /* * vmci_get_context_id() - Gets the current context ID. * @@ -45,6 +49,69 @@ u32 vmci_get_context_id(void) } EXPORT_SYMBOL_GPL(vmci_get_context_id); +/* + * vmci_register_vsock_callback() - Register the VSOCK vmci_transport callback. + * + * The callback will be called when the first host or guest becomes active, + * or if they are already active when this function is called. + * To unregister the callback, call this function with NULL parameter. + * + * Returns 0 on success. -EBUSY if a callback is already registered. + */ +int vmci_register_vsock_callback(vmci_vsock_cb callback) +{ + int err = 0; + + mutex_lock(&vmci_vsock_mutex); + + if (vmci_vsock_transport_cb && callback) { + err = -EBUSY; + goto out; + } + + vmci_vsock_transport_cb = callback; + + if (!vmci_vsock_transport_cb) { + vmci_vsock_cb_host_called = false; + goto out; + } + + if (vmci_guest_code_active()) + vmci_vsock_transport_cb(false); + + if (vmci_host_users() > 0) { + vmci_vsock_cb_host_called = true; + vmci_vsock_transport_cb(true); + } + +out: + mutex_unlock(&vmci_vsock_mutex); + return err; +} +EXPORT_SYMBOL_GPL(vmci_register_vsock_callback); + +void vmci_call_vsock_callback(bool is_host) +{ + mutex_lock(&vmci_vsock_mutex); + + if (!vmci_vsock_transport_cb) + goto out; + + /* In the host, this function could be called multiple times, + * but we want to register it only once. + */ + if (is_host) { + if (vmci_vsock_cb_host_called) + goto out; + + vmci_vsock_cb_host_called = true; + } + + vmci_vsock_transport_cb(is_host); +out: + mutex_unlock(&vmci_vsock_mutex); +} + static int __init vmci_drv_init(void) { int vmci_err; diff --git a/drivers/misc/vmw_vmci/vmci_driver.h b/drivers/misc/vmw_vmci/vmci_driver.h index aab81b67670c..990682480bf6 100644 --- a/drivers/misc/vmw_vmci/vmci_driver.h +++ b/drivers/misc/vmw_vmci/vmci_driver.h @@ -36,10 +36,12 @@ extern struct pci_dev *vmci_pdev; u32 vmci_get_context_id(void); int vmci_send_datagram(struct vmci_datagram *dg); +void vmci_call_vsock_callback(bool is_host); int vmci_host_init(void); void vmci_host_exit(void); bool vmci_host_code_active(void); +int vmci_host_users(void); int vmci_guest_init(void); void vmci_guest_exit(void); diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index 7a84a48c75da..cc8eeb361fcd 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -637,6 +637,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, vmci_dev->iobase + VMCI_CONTROL_ADDR); pci_set_drvdata(pdev, vmci_dev); + + vmci_call_vsock_callback(false); return 0; err_free_irq: diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c index 833e2bd248a5..ce16d6b99295 100644 --- a/drivers/misc/vmw_vmci/vmci_host.c +++ b/drivers/misc/vmw_vmci/vmci_host.c @@ -108,6 +108,11 @@ bool vmci_host_code_active(void) atomic_read(&vmci_host_active_users) > 0); } +int vmci_host_users(void) +{ + return atomic_read(&vmci_host_active_users); +} + /* * Called on open of /dev/vmci. */ @@ -338,6 +343,8 @@ static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev, vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; atomic_inc(&vmci_host_active_users); + vmci_call_vsock_callback(true); + retval = 0; out: @@ -961,7 +968,7 @@ static const struct file_operations vmuser_fops = { .release = vmci_host_close, .poll = vmci_host_poll, .unlocked_ioctl = vmci_host_unlocked_ioctl, - .compat_ioctl = vmci_host_unlocked_ioctl, + .compat_ioctl = compat_ptr_ioctl, }; static struct miscdevice vmci_host_miscdev = { diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c index f257d3812110..71bbaa56bdb5 100644 --- a/drivers/misc/xilinx_sdfec.c +++ b/drivers/misc/xilinx_sdfec.c @@ -19,11 +19,150 @@ #include <linux/poll.h> #include <linux/slab.h> #include <linux/clk.h> +#include <linux/compat.h> +#include <linux/highmem.h> + +#include <uapi/misc/xilinx_sdfec.h> #define DEV_NAME_LEN 12 -static struct idr dev_idr; -static struct mutex dev_idr_lock; +static DEFINE_IDA(dev_nrs); + +/* Xilinx SDFEC Register Map */ +/* CODE_WRI_PROTECT Register */ +#define XSDFEC_CODE_WR_PROTECT_ADDR (0x4) + +/* ACTIVE Register */ +#define XSDFEC_ACTIVE_ADDR (0x8) +#define XSDFEC_IS_ACTIVITY_SET (0x1) + +/* AXIS_WIDTH Register */ +#define XSDFEC_AXIS_WIDTH_ADDR (0xC) +#define XSDFEC_AXIS_DOUT_WORDS_LSB (5) +#define XSDFEC_AXIS_DOUT_WIDTH_LSB (3) +#define XSDFEC_AXIS_DIN_WORDS_LSB (2) +#define XSDFEC_AXIS_DIN_WIDTH_LSB (0) + +/* AXIS_ENABLE Register */ +#define XSDFEC_AXIS_ENABLE_ADDR (0x10) +#define XSDFEC_AXIS_OUT_ENABLE_MASK (0x38) +#define XSDFEC_AXIS_IN_ENABLE_MASK (0x7) +#define XSDFEC_AXIS_ENABLE_MASK \ + (XSDFEC_AXIS_OUT_ENABLE_MASK | XSDFEC_AXIS_IN_ENABLE_MASK) + +/* FEC_CODE Register */ +#define XSDFEC_FEC_CODE_ADDR (0x14) + +/* ORDER Register Map */ +#define XSDFEC_ORDER_ADDR (0x18) + +/* Interrupt Status Register */ +#define XSDFEC_ISR_ADDR (0x1C) +/* Interrupt Status Register Bit Mask */ +#define XSDFEC_ISR_MASK (0x3F) + +/* Write Only - Interrupt Enable Register */ +#define XSDFEC_IER_ADDR (0x20) +/* Write Only - Interrupt Disable Register */ +#define XSDFEC_IDR_ADDR (0x24) +/* Read Only - Interrupt Mask Register */ +#define XSDFEC_IMR_ADDR (0x28) + +/* ECC Interrupt Status Register */ +#define XSDFEC_ECC_ISR_ADDR (0x2C) +/* Single Bit Errors */ +#define XSDFEC_ECC_ISR_SBE_MASK (0x7FF) +/* PL Initialize Single Bit Errors */ +#define XSDFEC_PL_INIT_ECC_ISR_SBE_MASK (0x3C00000) +/* Multi Bit Errors */ +#define XSDFEC_ECC_ISR_MBE_MASK (0x3FF800) +/* PL Initialize Multi Bit Errors */ +#define XSDFEC_PL_INIT_ECC_ISR_MBE_MASK (0x3C000000) +/* Multi Bit Error to Event Shift */ +#define XSDFEC_ECC_ISR_MBE_TO_EVENT_SHIFT (11) +/* PL Initialize Multi Bit Error to Event Shift */ +#define XSDFEC_PL_INIT_ECC_ISR_MBE_TO_EVENT_SHIFT (4) +/* ECC Interrupt Status Bit Mask */ +#define XSDFEC_ECC_ISR_MASK (XSDFEC_ECC_ISR_SBE_MASK | XSDFEC_ECC_ISR_MBE_MASK) +/* ECC Interrupt Status PL Initialize Bit Mask */ +#define XSDFEC_PL_INIT_ECC_ISR_MASK \ + (XSDFEC_PL_INIT_ECC_ISR_SBE_MASK | XSDFEC_PL_INIT_ECC_ISR_MBE_MASK) +/* ECC Interrupt Status All Bit Mask */ +#define XSDFEC_ALL_ECC_ISR_MASK \ + (XSDFEC_ECC_ISR_MASK | XSDFEC_PL_INIT_ECC_ISR_MASK) +/* ECC Interrupt Status Single Bit Errors Mask */ +#define XSDFEC_ALL_ECC_ISR_SBE_MASK \ + (XSDFEC_ECC_ISR_SBE_MASK | XSDFEC_PL_INIT_ECC_ISR_SBE_MASK) +/* ECC Interrupt Status Multi Bit Errors Mask */ +#define XSDFEC_ALL_ECC_ISR_MBE_MASK \ + (XSDFEC_ECC_ISR_MBE_MASK | XSDFEC_PL_INIT_ECC_ISR_MBE_MASK) + +/* Write Only - ECC Interrupt Enable Register */ +#define XSDFEC_ECC_IER_ADDR (0x30) +/* Write Only - ECC Interrupt Disable Register */ +#define XSDFEC_ECC_IDR_ADDR (0x34) +/* Read Only - ECC Interrupt Mask Register */ +#define XSDFEC_ECC_IMR_ADDR (0x38) + +/* BYPASS Register */ +#define XSDFEC_BYPASS_ADDR (0x3C) + +/* Turbo Code Register */ +#define XSDFEC_TURBO_ADDR (0x100) +#define XSDFEC_TURBO_SCALE_MASK (0xFFF) +#define XSDFEC_TURBO_SCALE_BIT_POS (8) +#define XSDFEC_TURBO_SCALE_MAX (15) + +/* REG0 Register */ +#define XSDFEC_LDPC_CODE_REG0_ADDR_BASE (0x2000) +#define XSDFEC_LDPC_CODE_REG0_ADDR_HIGH (0x27F0) +#define XSDFEC_REG0_N_MIN (4) +#define XSDFEC_REG0_N_MAX (32768) +#define XSDFEC_REG0_N_MUL_P (256) +#define XSDFEC_REG0_N_LSB (0) +#define XSDFEC_REG0_K_MIN (2) +#define XSDFEC_REG0_K_MAX (32766) +#define XSDFEC_REG0_K_MUL_P (256) +#define XSDFEC_REG0_K_LSB (16) + +/* REG1 Register */ +#define XSDFEC_LDPC_CODE_REG1_ADDR_BASE (0x2004) +#define XSDFEC_LDPC_CODE_REG1_ADDR_HIGH (0x27f4) +#define XSDFEC_REG1_PSIZE_MIN (2) +#define XSDFEC_REG1_PSIZE_MAX (512) +#define XSDFEC_REG1_NO_PACKING_MASK (0x400) +#define XSDFEC_REG1_NO_PACKING_LSB (10) +#define XSDFEC_REG1_NM_MASK (0xFF800) +#define XSDFEC_REG1_NM_LSB (11) +#define XSDFEC_REG1_BYPASS_MASK (0x100000) + +/* REG2 Register */ +#define XSDFEC_LDPC_CODE_REG2_ADDR_BASE (0x2008) +#define XSDFEC_LDPC_CODE_REG2_ADDR_HIGH (0x27f8) +#define XSDFEC_REG2_NLAYERS_MIN (1) +#define XSDFEC_REG2_NLAYERS_MAX (256) +#define XSDFEC_REG2_NNMQC_MASK (0xFFE00) +#define XSDFEC_REG2_NMQC_LSB (9) +#define XSDFEC_REG2_NORM_TYPE_MASK (0x100000) +#define XSDFEC_REG2_NORM_TYPE_LSB (20) +#define XSDFEC_REG2_SPECIAL_QC_MASK (0x200000) +#define XSDFEC_REG2_SPEICAL_QC_LSB (21) +#define XSDFEC_REG2_NO_FINAL_PARITY_MASK (0x400000) +#define XSDFEC_REG2_NO_FINAL_PARITY_LSB (22) +#define XSDFEC_REG2_MAX_SCHEDULE_MASK (0x1800000) +#define XSDFEC_REG2_MAX_SCHEDULE_LSB (23) + +/* REG3 Register */ +#define XSDFEC_LDPC_CODE_REG3_ADDR_BASE (0x200C) +#define XSDFEC_LDPC_CODE_REG3_ADDR_HIGH (0x27FC) +#define XSDFEC_REG3_LA_OFF_LSB (8) +#define XSDFEC_REG3_QC_OFF_LSB (16) + +#define XSDFEC_LDPC_REG_JUMP (0x10) +#define XSDFEC_REG_WIDTH_JUMP (4) + +/* The maximum number of pinned pages */ +#define MAX_NUM_PAGES ((XSDFEC_QC_TABLE_DEPTH / PAGE_SIZE) + 1) /** * struct xsdfec_clks - For managing SD-FEC clocks @@ -49,31 +188,1043 @@ struct xsdfec_clks { /** * struct xsdfec_dev - Driver data for SDFEC - * @regs: device physical base address - * @dev: pointer to device struct * @miscdev: Misc device handle - * @error_data_lock: Error counter and states spinlock * @clks: Clocks managed by the SDFEC driver + * @waitq: Driver wait queue + * @config: Configuration of the SDFEC device * @dev_name: Device name + * @flags: spinlock flags + * @regs: device physical base address + * @dev: pointer to device struct + * @state: State of the SDFEC device + * @error_data_lock: Error counter and states spinlock * @dev_id: Device ID + * @isr_err_count: Count of ISR errors + * @cecc_count: Count of Correctable ECC errors (SBE) + * @uecc_count: Count of Uncorrectable ECC errors (MBE) + * @irq: IRQ number + * @state_updated: indicates State updated by interrupt handler + * @stats_updated: indicates Stats updated by interrupt handler + * @intr_enabled: indicates IRQ enabled * * This structure contains necessary state for SDFEC driver to operate */ struct xsdfec_dev { + struct miscdevice miscdev; + struct xsdfec_clks clks; + wait_queue_head_t waitq; + struct xsdfec_config config; + char dev_name[DEV_NAME_LEN]; + unsigned long flags; void __iomem *regs; struct device *dev; - struct miscdevice miscdev; + enum xsdfec_state state; /* Spinlock to protect state_updated and stats_updated */ spinlock_t error_data_lock; - struct xsdfec_clks clks; - char dev_name[DEV_NAME_LEN]; int dev_id; + u32 isr_err_count; + u32 cecc_count; + u32 uecc_count; + int irq; + bool state_updated; + bool stats_updated; + bool intr_enabled; }; +static inline void xsdfec_regwrite(struct xsdfec_dev *xsdfec, u32 addr, + u32 value) +{ + dev_dbg(xsdfec->dev, "Writing 0x%x to offset 0x%x", value, addr); + iowrite32(value, xsdfec->regs + addr); +} + +static inline u32 xsdfec_regread(struct xsdfec_dev *xsdfec, u32 addr) +{ + u32 rval; + + rval = ioread32(xsdfec->regs + addr); + dev_dbg(xsdfec->dev, "Read value = 0x%x from offset 0x%x", rval, addr); + return rval; +} + +static void update_bool_config_from_reg(struct xsdfec_dev *xsdfec, + u32 reg_offset, u32 bit_num, + char *config_value) +{ + u32 reg_val; + u32 bit_mask = 1 << bit_num; + + reg_val = xsdfec_regread(xsdfec, reg_offset); + *config_value = (reg_val & bit_mask) > 0; +} + +static void update_config_from_hw(struct xsdfec_dev *xsdfec) +{ + u32 reg_value; + bool sdfec_started; + + /* Update the Order */ + reg_value = xsdfec_regread(xsdfec, XSDFEC_ORDER_ADDR); + xsdfec->config.order = reg_value; + + update_bool_config_from_reg(xsdfec, XSDFEC_BYPASS_ADDR, + 0, /* Bit Number, maybe change to mask */ + &xsdfec->config.bypass); + + update_bool_config_from_reg(xsdfec, XSDFEC_CODE_WR_PROTECT_ADDR, + 0, /* Bit Number */ + &xsdfec->config.code_wr_protect); + + reg_value = xsdfec_regread(xsdfec, XSDFEC_IMR_ADDR); + xsdfec->config.irq.enable_isr = (reg_value & XSDFEC_ISR_MASK) > 0; + + reg_value = xsdfec_regread(xsdfec, XSDFEC_ECC_IMR_ADDR); + xsdfec->config.irq.enable_ecc_isr = + (reg_value & XSDFEC_ECC_ISR_MASK) > 0; + + reg_value = xsdfec_regread(xsdfec, XSDFEC_AXIS_ENABLE_ADDR); + sdfec_started = (reg_value & XSDFEC_AXIS_IN_ENABLE_MASK) > 0; + if (sdfec_started) + xsdfec->state = XSDFEC_STARTED; + else + xsdfec->state = XSDFEC_STOPPED; +} + +static int xsdfec_get_status(struct xsdfec_dev *xsdfec, void __user *arg) +{ + struct xsdfec_status status; + int err; + + memset(&status, 0, sizeof(status)); + spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags); + status.state = xsdfec->state; + xsdfec->state_updated = false; + spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags); + status.activity = (xsdfec_regread(xsdfec, XSDFEC_ACTIVE_ADDR) & + XSDFEC_IS_ACTIVITY_SET); + + err = copy_to_user(arg, &status, sizeof(status)); + if (err) + err = -EFAULT; + + return err; +} + +static int xsdfec_get_config(struct xsdfec_dev *xsdfec, void __user *arg) +{ + int err; + + err = copy_to_user(arg, &xsdfec->config, sizeof(xsdfec->config)); + if (err) + err = -EFAULT; + + return err; +} + +static int xsdfec_isr_enable(struct xsdfec_dev *xsdfec, bool enable) +{ + u32 mask_read; + + if (enable) { + /* Enable */ + xsdfec_regwrite(xsdfec, XSDFEC_IER_ADDR, XSDFEC_ISR_MASK); + mask_read = xsdfec_regread(xsdfec, XSDFEC_IMR_ADDR); + if (mask_read & XSDFEC_ISR_MASK) { + dev_dbg(xsdfec->dev, + "SDFEC enabling irq with IER failed"); + return -EIO; + } + } else { + /* Disable */ + xsdfec_regwrite(xsdfec, XSDFEC_IDR_ADDR, XSDFEC_ISR_MASK); + mask_read = xsdfec_regread(xsdfec, XSDFEC_IMR_ADDR); + if ((mask_read & XSDFEC_ISR_MASK) != XSDFEC_ISR_MASK) { + dev_dbg(xsdfec->dev, + "SDFEC disabling irq with IDR failed"); + return -EIO; + } + } + return 0; +} + +static int xsdfec_ecc_isr_enable(struct xsdfec_dev *xsdfec, bool enable) +{ + u32 mask_read; + + if (enable) { + /* Enable */ + xsdfec_regwrite(xsdfec, XSDFEC_ECC_IER_ADDR, + XSDFEC_ALL_ECC_ISR_MASK); + mask_read = xsdfec_regread(xsdfec, XSDFEC_ECC_IMR_ADDR); + if (mask_read & XSDFEC_ALL_ECC_ISR_MASK) { + dev_dbg(xsdfec->dev, + "SDFEC enabling ECC irq with ECC IER failed"); + return -EIO; + } + } else { + /* Disable */ + xsdfec_regwrite(xsdfec, XSDFEC_ECC_IDR_ADDR, + XSDFEC_ALL_ECC_ISR_MASK); + mask_read = xsdfec_regread(xsdfec, XSDFEC_ECC_IMR_ADDR); + if (!(((mask_read & XSDFEC_ALL_ECC_ISR_MASK) == + XSDFEC_ECC_ISR_MASK) || + ((mask_read & XSDFEC_ALL_ECC_ISR_MASK) == + XSDFEC_PL_INIT_ECC_ISR_MASK))) { + dev_dbg(xsdfec->dev, + "SDFEC disable ECC irq with ECC IDR failed"); + return -EIO; + } + } + return 0; +} + +static int xsdfec_set_irq(struct xsdfec_dev *xsdfec, void __user *arg) +{ + struct xsdfec_irq irq; + int err; + int isr_err; + int ecc_err; + + err = copy_from_user(&irq, arg, sizeof(irq)); + if (err) + return -EFAULT; + + /* Setup tlast related IRQ */ + isr_err = xsdfec_isr_enable(xsdfec, irq.enable_isr); + if (!isr_err) + xsdfec->config.irq.enable_isr = irq.enable_isr; + + /* Setup ECC related IRQ */ + ecc_err = xsdfec_ecc_isr_enable(xsdfec, irq.enable_ecc_isr); + if (!ecc_err) + xsdfec->config.irq.enable_ecc_isr = irq.enable_ecc_isr; + + if (isr_err < 0 || ecc_err < 0) + err = -EIO; + + return err; +} + +static int xsdfec_set_turbo(struct xsdfec_dev *xsdfec, void __user *arg) +{ + struct xsdfec_turbo turbo; + int err; + u32 turbo_write; + + err = copy_from_user(&turbo, arg, sizeof(turbo)); + if (err) + return -EFAULT; + + if (turbo.alg >= XSDFEC_TURBO_ALG_MAX) + return -EINVAL; + + if (turbo.scale > XSDFEC_TURBO_SCALE_MAX) + return -EINVAL; + + /* Check to see what device tree says about the FEC codes */ + if (xsdfec->config.code == XSDFEC_LDPC_CODE) + return -EIO; + + turbo_write = ((turbo.scale & XSDFEC_TURBO_SCALE_MASK) + << XSDFEC_TURBO_SCALE_BIT_POS) | + turbo.alg; + xsdfec_regwrite(xsdfec, XSDFEC_TURBO_ADDR, turbo_write); + return err; +} + +static int xsdfec_get_turbo(struct xsdfec_dev *xsdfec, void __user *arg) +{ + u32 reg_value; + struct xsdfec_turbo turbo_params; + int err; + + if (xsdfec->config.code == XSDFEC_LDPC_CODE) + return -EIO; + + memset(&turbo_params, 0, sizeof(turbo_params)); + reg_value = xsdfec_regread(xsdfec, XSDFEC_TURBO_ADDR); + + turbo_params.scale = (reg_value & XSDFEC_TURBO_SCALE_MASK) >> + XSDFEC_TURBO_SCALE_BIT_POS; + turbo_params.alg = reg_value & 0x1; + + err = copy_to_user(arg, &turbo_params, sizeof(turbo_params)); + if (err) + err = -EFAULT; + + return err; +} + +static int xsdfec_reg0_write(struct xsdfec_dev *xsdfec, u32 n, u32 k, u32 psize, + u32 offset) +{ + u32 wdata; + + if (n < XSDFEC_REG0_N_MIN || n > XSDFEC_REG0_N_MAX || psize == 0 || + (n > XSDFEC_REG0_N_MUL_P * psize) || n <= k || ((n % psize) != 0)) { + dev_dbg(xsdfec->dev, "N value is not in range"); + return -EINVAL; + } + n <<= XSDFEC_REG0_N_LSB; + + if (k < XSDFEC_REG0_K_MIN || k > XSDFEC_REG0_K_MAX || + (k > XSDFEC_REG0_K_MUL_P * psize) || ((k % psize) != 0)) { + dev_dbg(xsdfec->dev, "K value is not in range"); + return -EINVAL; + } + k = k << XSDFEC_REG0_K_LSB; + wdata = k | n; + + if (XSDFEC_LDPC_CODE_REG0_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) > + XSDFEC_LDPC_CODE_REG0_ADDR_HIGH) { + dev_dbg(xsdfec->dev, "Writing outside of LDPC reg0 space 0x%x", + XSDFEC_LDPC_CODE_REG0_ADDR_BASE + + (offset * XSDFEC_LDPC_REG_JUMP)); + return -EINVAL; + } + xsdfec_regwrite(xsdfec, + XSDFEC_LDPC_CODE_REG0_ADDR_BASE + + (offset * XSDFEC_LDPC_REG_JUMP), + wdata); + return 0; +} + +static int xsdfec_reg1_write(struct xsdfec_dev *xsdfec, u32 psize, + u32 no_packing, u32 nm, u32 offset) +{ + u32 wdata; + + if (psize < XSDFEC_REG1_PSIZE_MIN || psize > XSDFEC_REG1_PSIZE_MAX) { + dev_dbg(xsdfec->dev, "Psize is not in range"); + return -EINVAL; + } + + if (no_packing != 0 && no_packing != 1) + dev_dbg(xsdfec->dev, "No-packing bit register invalid"); + no_packing = ((no_packing << XSDFEC_REG1_NO_PACKING_LSB) & + XSDFEC_REG1_NO_PACKING_MASK); + + if (nm & ~(XSDFEC_REG1_NM_MASK >> XSDFEC_REG1_NM_LSB)) + dev_dbg(xsdfec->dev, "NM is beyond 10 bits"); + nm = (nm << XSDFEC_REG1_NM_LSB) & XSDFEC_REG1_NM_MASK; + + wdata = nm | no_packing | psize; + if (XSDFEC_LDPC_CODE_REG1_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) > + XSDFEC_LDPC_CODE_REG1_ADDR_HIGH) { + dev_dbg(xsdfec->dev, "Writing outside of LDPC reg1 space 0x%x", + XSDFEC_LDPC_CODE_REG1_ADDR_BASE + + (offset * XSDFEC_LDPC_REG_JUMP)); + return -EINVAL; + } + xsdfec_regwrite(xsdfec, + XSDFEC_LDPC_CODE_REG1_ADDR_BASE + + (offset * XSDFEC_LDPC_REG_JUMP), + wdata); + return 0; +} + +static int xsdfec_reg2_write(struct xsdfec_dev *xsdfec, u32 nlayers, u32 nmqc, + u32 norm_type, u32 special_qc, u32 no_final_parity, + u32 max_schedule, u32 offset) +{ + u32 wdata; + + if (nlayers < XSDFEC_REG2_NLAYERS_MIN || + nlayers > XSDFEC_REG2_NLAYERS_MAX) { + dev_dbg(xsdfec->dev, "Nlayers is not in range"); + return -EINVAL; + } + + if (nmqc & ~(XSDFEC_REG2_NNMQC_MASK >> XSDFEC_REG2_NMQC_LSB)) + dev_dbg(xsdfec->dev, "NMQC exceeds 11 bits"); + nmqc = (nmqc << XSDFEC_REG2_NMQC_LSB) & XSDFEC_REG2_NNMQC_MASK; + + if (norm_type > 1) + dev_dbg(xsdfec->dev, "Norm type is invalid"); + norm_type = ((norm_type << XSDFEC_REG2_NORM_TYPE_LSB) & + XSDFEC_REG2_NORM_TYPE_MASK); + if (special_qc > 1) + dev_dbg(xsdfec->dev, "Special QC in invalid"); + special_qc = ((special_qc << XSDFEC_REG2_SPEICAL_QC_LSB) & + XSDFEC_REG2_SPECIAL_QC_MASK); + + if (no_final_parity > 1) + dev_dbg(xsdfec->dev, "No final parity check invalid"); + no_final_parity = + ((no_final_parity << XSDFEC_REG2_NO_FINAL_PARITY_LSB) & + XSDFEC_REG2_NO_FINAL_PARITY_MASK); + if (max_schedule & + ~(XSDFEC_REG2_MAX_SCHEDULE_MASK >> XSDFEC_REG2_MAX_SCHEDULE_LSB)) + dev_dbg(xsdfec->dev, "Max Schedule exceeds 2 bits"); + max_schedule = ((max_schedule << XSDFEC_REG2_MAX_SCHEDULE_LSB) & + XSDFEC_REG2_MAX_SCHEDULE_MASK); + + wdata = (max_schedule | no_final_parity | special_qc | norm_type | + nmqc | nlayers); + + if (XSDFEC_LDPC_CODE_REG2_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) > + XSDFEC_LDPC_CODE_REG2_ADDR_HIGH) { + dev_dbg(xsdfec->dev, "Writing outside of LDPC reg2 space 0x%x", + XSDFEC_LDPC_CODE_REG2_ADDR_BASE + + (offset * XSDFEC_LDPC_REG_JUMP)); + return -EINVAL; + } + xsdfec_regwrite(xsdfec, + XSDFEC_LDPC_CODE_REG2_ADDR_BASE + + (offset * XSDFEC_LDPC_REG_JUMP), + wdata); + return 0; +} + +static int xsdfec_reg3_write(struct xsdfec_dev *xsdfec, u8 sc_off, u8 la_off, + u16 qc_off, u32 offset) +{ + u32 wdata; + + wdata = ((qc_off << XSDFEC_REG3_QC_OFF_LSB) | + (la_off << XSDFEC_REG3_LA_OFF_LSB) | sc_off); + if (XSDFEC_LDPC_CODE_REG3_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) > + XSDFEC_LDPC_CODE_REG3_ADDR_HIGH) { + dev_dbg(xsdfec->dev, "Writing outside of LDPC reg3 space 0x%x", + XSDFEC_LDPC_CODE_REG3_ADDR_BASE + + (offset * XSDFEC_LDPC_REG_JUMP)); + return -EINVAL; + } + xsdfec_regwrite(xsdfec, + XSDFEC_LDPC_CODE_REG3_ADDR_BASE + + (offset * XSDFEC_LDPC_REG_JUMP), + wdata); + return 0; +} + +static int xsdfec_table_write(struct xsdfec_dev *xsdfec, u32 offset, + u32 *src_ptr, u32 len, const u32 base_addr, + const u32 depth) +{ + u32 reg = 0; + u32 res; + u32 n, i; + u32 *addr = NULL; + struct page *page[MAX_NUM_PAGES]; + + /* + * Writes that go beyond the length of + * Shared Scale(SC) table should fail + */ + if (offset > depth / XSDFEC_REG_WIDTH_JUMP || + len > depth / XSDFEC_REG_WIDTH_JUMP || + offset + len > depth / XSDFEC_REG_WIDTH_JUMP) { + dev_dbg(xsdfec->dev, "Write exceeds SC table length"); + return -EINVAL; + } + + n = (len * XSDFEC_REG_WIDTH_JUMP) / PAGE_SIZE; + if ((len * XSDFEC_REG_WIDTH_JUMP) % PAGE_SIZE) + n += 1; + + res = get_user_pages_fast((unsigned long)src_ptr, n, 0, page); + if (res < n) { + for (i = 0; i < res; i++) + put_page(page[i]); + return -EINVAL; + } + + for (i = 0; i < n; i++) { + addr = kmap(page[i]); + do { + xsdfec_regwrite(xsdfec, + base_addr + ((offset + reg) * + XSDFEC_REG_WIDTH_JUMP), + addr[reg]); + reg++; + } while ((reg < len) && + ((reg * XSDFEC_REG_WIDTH_JUMP) % PAGE_SIZE)); + put_page(page[i]); + } + return reg; +} + +static int xsdfec_add_ldpc(struct xsdfec_dev *xsdfec, void __user *arg) +{ + struct xsdfec_ldpc_params *ldpc; + int ret, n; + + ldpc = kzalloc(sizeof(*ldpc), GFP_KERNEL); + if (!ldpc) + return -ENOMEM; + + if (copy_from_user(ldpc, arg, sizeof(*ldpc))) { + ret = -EFAULT; + goto err_out; + } + + if (xsdfec->config.code == XSDFEC_TURBO_CODE) { + ret = -EIO; + goto err_out; + } + + /* Verify Device has not started */ + if (xsdfec->state == XSDFEC_STARTED) { + ret = -EIO; + goto err_out; + } + + if (xsdfec->config.code_wr_protect) { + ret = -EIO; + goto err_out; + } + + /* Write Reg 0 */ + ret = xsdfec_reg0_write(xsdfec, ldpc->n, ldpc->k, ldpc->psize, + ldpc->code_id); + if (ret) + goto err_out; + + /* Write Reg 1 */ + ret = xsdfec_reg1_write(xsdfec, ldpc->psize, ldpc->no_packing, ldpc->nm, + ldpc->code_id); + if (ret) + goto err_out; + + /* Write Reg 2 */ + ret = xsdfec_reg2_write(xsdfec, ldpc->nlayers, ldpc->nmqc, + ldpc->norm_type, ldpc->special_qc, + ldpc->no_final_parity, ldpc->max_schedule, + ldpc->code_id); + if (ret) + goto err_out; + + /* Write Reg 3 */ + ret = xsdfec_reg3_write(xsdfec, ldpc->sc_off, ldpc->la_off, + ldpc->qc_off, ldpc->code_id); + if (ret) + goto err_out; + + /* Write Shared Codes */ + n = ldpc->nlayers / 4; + if (ldpc->nlayers % 4) + n++; + + ret = xsdfec_table_write(xsdfec, ldpc->sc_off, ldpc->sc_table, n, + XSDFEC_LDPC_SC_TABLE_ADDR_BASE, + XSDFEC_SC_TABLE_DEPTH); + if (ret < 0) + goto err_out; + + ret = xsdfec_table_write(xsdfec, 4 * ldpc->la_off, ldpc->la_table, + ldpc->nlayers, XSDFEC_LDPC_LA_TABLE_ADDR_BASE, + XSDFEC_LA_TABLE_DEPTH); + if (ret < 0) + goto err_out; + + ret = xsdfec_table_write(xsdfec, 4 * ldpc->qc_off, ldpc->qc_table, + ldpc->nqc, XSDFEC_LDPC_QC_TABLE_ADDR_BASE, + XSDFEC_QC_TABLE_DEPTH); + if (ret > 0) + ret = 0; +err_out: + kfree(ldpc); + return ret; +} + +static int xsdfec_set_order(struct xsdfec_dev *xsdfec, void __user *arg) +{ + bool order_invalid; + enum xsdfec_order order; + int err; + + err = get_user(order, (enum xsdfec_order __user *)arg); + if (err) + return -EFAULT; + + order_invalid = (order != XSDFEC_MAINTAIN_ORDER) && + (order != XSDFEC_OUT_OF_ORDER); + if (order_invalid) + return -EINVAL; + + /* Verify Device has not started */ + if (xsdfec->state == XSDFEC_STARTED) + return -EIO; + + xsdfec_regwrite(xsdfec, XSDFEC_ORDER_ADDR, order); + + xsdfec->config.order = order; + + return 0; +} + +static int xsdfec_set_bypass(struct xsdfec_dev *xsdfec, bool __user *arg) +{ + bool bypass; + int err; + + err = get_user(bypass, arg); + if (err) + return -EFAULT; + + /* Verify Device has not started */ + if (xsdfec->state == XSDFEC_STARTED) + return -EIO; + + if (bypass) + xsdfec_regwrite(xsdfec, XSDFEC_BYPASS_ADDR, 1); + else + xsdfec_regwrite(xsdfec, XSDFEC_BYPASS_ADDR, 0); + + xsdfec->config.bypass = bypass; + + return 0; +} + +static int xsdfec_is_active(struct xsdfec_dev *xsdfec, bool __user *arg) +{ + u32 reg_value; + bool is_active; + int err; + + reg_value = xsdfec_regread(xsdfec, XSDFEC_ACTIVE_ADDR); + /* using a double ! operator instead of casting */ + is_active = !!(reg_value & XSDFEC_IS_ACTIVITY_SET); + err = put_user(is_active, arg); + if (err) + return -EFAULT; + + return err; +} + +static u32 +xsdfec_translate_axis_width_cfg_val(enum xsdfec_axis_width axis_width_cfg) +{ + u32 axis_width_field = 0; + + switch (axis_width_cfg) { + case XSDFEC_1x128b: + axis_width_field = 0; + break; + case XSDFEC_2x128b: + axis_width_field = 1; + break; + case XSDFEC_4x128b: + axis_width_field = 2; + break; + } + + return axis_width_field; +} + +static u32 xsdfec_translate_axis_words_cfg_val(enum xsdfec_axis_word_include + axis_word_inc_cfg) +{ + u32 axis_words_field = 0; + + if (axis_word_inc_cfg == XSDFEC_FIXED_VALUE || + axis_word_inc_cfg == XSDFEC_IN_BLOCK) + axis_words_field = 0; + else if (axis_word_inc_cfg == XSDFEC_PER_AXI_TRANSACTION) + axis_words_field = 1; + + return axis_words_field; +} + +static int xsdfec_cfg_axi_streams(struct xsdfec_dev *xsdfec) +{ + u32 reg_value; + u32 dout_words_field; + u32 dout_width_field; + u32 din_words_field; + u32 din_width_field; + struct xsdfec_config *config = &xsdfec->config; + + /* translate config info to register values */ + dout_words_field = + xsdfec_translate_axis_words_cfg_val(config->dout_word_include); + dout_width_field = + xsdfec_translate_axis_width_cfg_val(config->dout_width); + din_words_field = + xsdfec_translate_axis_words_cfg_val(config->din_word_include); + din_width_field = + xsdfec_translate_axis_width_cfg_val(config->din_width); + + reg_value = dout_words_field << XSDFEC_AXIS_DOUT_WORDS_LSB; + reg_value |= dout_width_field << XSDFEC_AXIS_DOUT_WIDTH_LSB; + reg_value |= din_words_field << XSDFEC_AXIS_DIN_WORDS_LSB; + reg_value |= din_width_field << XSDFEC_AXIS_DIN_WIDTH_LSB; + + xsdfec_regwrite(xsdfec, XSDFEC_AXIS_WIDTH_ADDR, reg_value); + + return 0; +} + +static int xsdfec_dev_open(struct inode *iptr, struct file *fptr) +{ + return 0; +} + +static int xsdfec_dev_release(struct inode *iptr, struct file *fptr) +{ + return 0; +} + +static int xsdfec_start(struct xsdfec_dev *xsdfec) +{ + u32 regread; + + regread = xsdfec_regread(xsdfec, XSDFEC_FEC_CODE_ADDR); + regread &= 0x1; + if (regread != xsdfec->config.code) { + dev_dbg(xsdfec->dev, + "%s SDFEC HW code does not match driver code, reg %d, code %d", + __func__, regread, xsdfec->config.code); + return -EINVAL; + } + + /* Set AXIS enable */ + xsdfec_regwrite(xsdfec, XSDFEC_AXIS_ENABLE_ADDR, + XSDFEC_AXIS_ENABLE_MASK); + /* Done */ + xsdfec->state = XSDFEC_STARTED; + return 0; +} + +static int xsdfec_stop(struct xsdfec_dev *xsdfec) +{ + u32 regread; + + if (xsdfec->state != XSDFEC_STARTED) + dev_dbg(xsdfec->dev, "Device not started correctly"); + /* Disable AXIS_ENABLE Input interfaces only */ + regread = xsdfec_regread(xsdfec, XSDFEC_AXIS_ENABLE_ADDR); + regread &= (~XSDFEC_AXIS_IN_ENABLE_MASK); + xsdfec_regwrite(xsdfec, XSDFEC_AXIS_ENABLE_ADDR, regread); + /* Stop */ + xsdfec->state = XSDFEC_STOPPED; + return 0; +} + +static int xsdfec_clear_stats(struct xsdfec_dev *xsdfec) +{ + spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags); + xsdfec->isr_err_count = 0; + xsdfec->uecc_count = 0; + xsdfec->cecc_count = 0; + spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags); + + return 0; +} + +static int xsdfec_get_stats(struct xsdfec_dev *xsdfec, void __user *arg) +{ + int err; + struct xsdfec_stats user_stats; + + spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags); + user_stats.isr_err_count = xsdfec->isr_err_count; + user_stats.cecc_count = xsdfec->cecc_count; + user_stats.uecc_count = xsdfec->uecc_count; + xsdfec->stats_updated = false; + spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags); + + err = copy_to_user(arg, &user_stats, sizeof(user_stats)); + if (err) + err = -EFAULT; + + return err; +} + +static int xsdfec_set_default_config(struct xsdfec_dev *xsdfec) +{ + /* Ensure registers are aligned with core configuration */ + xsdfec_regwrite(xsdfec, XSDFEC_FEC_CODE_ADDR, xsdfec->config.code); + xsdfec_cfg_axi_streams(xsdfec); + update_config_from_hw(xsdfec); + + return 0; +} + +static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd, + unsigned long data) +{ + struct xsdfec_dev *xsdfec; + void __user *arg = NULL; + int rval = -EINVAL; + + xsdfec = container_of(fptr->private_data, struct xsdfec_dev, miscdev); + + /* In failed state allow only reset and get status IOCTLs */ + if (xsdfec->state == XSDFEC_NEEDS_RESET && + (cmd != XSDFEC_SET_DEFAULT_CONFIG && cmd != XSDFEC_GET_STATUS && + cmd != XSDFEC_GET_STATS && cmd != XSDFEC_CLEAR_STATS)) { + return -EPERM; + } + + if (_IOC_TYPE(cmd) != XSDFEC_MAGIC) + return -ENOTTY; + + /* check if ioctl argument is present and valid */ + if (_IOC_DIR(cmd) != _IOC_NONE) { + arg = (void __user *)data; + if (!arg) + return rval; + } + + switch (cmd) { + case XSDFEC_START_DEV: + rval = xsdfec_start(xsdfec); + break; + case XSDFEC_STOP_DEV: + rval = xsdfec_stop(xsdfec); + break; + case XSDFEC_CLEAR_STATS: + rval = xsdfec_clear_stats(xsdfec); + break; + case XSDFEC_GET_STATS: + rval = xsdfec_get_stats(xsdfec, arg); + break; + case XSDFEC_GET_STATUS: + rval = xsdfec_get_status(xsdfec, arg); + break; + case XSDFEC_GET_CONFIG: + rval = xsdfec_get_config(xsdfec, arg); + break; + case XSDFEC_SET_DEFAULT_CONFIG: + rval = xsdfec_set_default_config(xsdfec); + break; + case XSDFEC_SET_IRQ: + rval = xsdfec_set_irq(xsdfec, arg); + break; + case XSDFEC_SET_TURBO: + rval = xsdfec_set_turbo(xsdfec, arg); + break; + case XSDFEC_GET_TURBO: + rval = xsdfec_get_turbo(xsdfec, arg); + break; + case XSDFEC_ADD_LDPC_CODE_PARAMS: + rval = xsdfec_add_ldpc(xsdfec, arg); + break; + case XSDFEC_SET_ORDER: + rval = xsdfec_set_order(xsdfec, arg); + break; + case XSDFEC_SET_BYPASS: + rval = xsdfec_set_bypass(xsdfec, arg); + break; + case XSDFEC_IS_ACTIVE: + rval = xsdfec_is_active(xsdfec, (bool __user *)arg); + break; + default: + /* Should not get here */ + break; + } + return rval; +} + +#ifdef CONFIG_COMPAT +static long xsdfec_dev_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long data) +{ + return xsdfec_dev_ioctl(file, cmd, (unsigned long)compat_ptr(data)); +} +#endif + +static __poll_t xsdfec_poll(struct file *file, poll_table *wait) +{ + __poll_t mask = 0; + struct xsdfec_dev *xsdfec; + + xsdfec = container_of(file->private_data, struct xsdfec_dev, miscdev); + + if (!xsdfec) + return EPOLLNVAL | EPOLLHUP; + + poll_wait(file, &xsdfec->waitq, wait); + + /* XSDFEC ISR detected an error */ + spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags); + if (xsdfec->state_updated) + mask |= EPOLLIN | EPOLLPRI; + + if (xsdfec->stats_updated) + mask |= EPOLLIN | EPOLLRDNORM; + spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags); + + return mask; +} + static const struct file_operations xsdfec_fops = { .owner = THIS_MODULE, + .open = xsdfec_dev_open, + .release = xsdfec_dev_release, + .unlocked_ioctl = xsdfec_dev_ioctl, + .poll = xsdfec_poll, +#ifdef CONFIG_COMPAT + .compat_ioctl = xsdfec_dev_compat_ioctl, +#endif }; +static int xsdfec_parse_of(struct xsdfec_dev *xsdfec) +{ + struct device *dev = xsdfec->dev; + struct device_node *node = dev->of_node; + int rval; + const char *fec_code; + u32 din_width; + u32 din_word_include; + u32 dout_width; + u32 dout_word_include; + + rval = of_property_read_string(node, "xlnx,sdfec-code", &fec_code); + if (rval < 0) + return rval; + + if (!strcasecmp(fec_code, "ldpc")) + xsdfec->config.code = XSDFEC_LDPC_CODE; + else if (!strcasecmp(fec_code, "turbo")) + xsdfec->config.code = XSDFEC_TURBO_CODE; + else + return -EINVAL; + + rval = of_property_read_u32(node, "xlnx,sdfec-din-words", + &din_word_include); + if (rval < 0) + return rval; + + if (din_word_include < XSDFEC_AXIS_WORDS_INCLUDE_MAX) + xsdfec->config.din_word_include = din_word_include; + else + return -EINVAL; + + rval = of_property_read_u32(node, "xlnx,sdfec-din-width", &din_width); + if (rval < 0) + return rval; + + switch (din_width) { + /* Fall through and set for valid values */ + case XSDFEC_1x128b: + case XSDFEC_2x128b: + case XSDFEC_4x128b: + xsdfec->config.din_width = din_width; + break; + default: + return -EINVAL; + } + + rval = of_property_read_u32(node, "xlnx,sdfec-dout-words", + &dout_word_include); + if (rval < 0) + return rval; + + if (dout_word_include < XSDFEC_AXIS_WORDS_INCLUDE_MAX) + xsdfec->config.dout_word_include = dout_word_include; + else + return -EINVAL; + + rval = of_property_read_u32(node, "xlnx,sdfec-dout-width", &dout_width); + if (rval < 0) + return rval; + + switch (dout_width) { + /* Fall through and set for valid values */ + case XSDFEC_1x128b: + case XSDFEC_2x128b: + case XSDFEC_4x128b: + xsdfec->config.dout_width = dout_width; + break; + default: + return -EINVAL; + } + + /* Write LDPC to CODE Register */ + xsdfec_regwrite(xsdfec, XSDFEC_FEC_CODE_ADDR, xsdfec->config.code); + + xsdfec_cfg_axi_streams(xsdfec); + + return 0; +} + +static irqreturn_t xsdfec_irq_thread(int irq, void *dev_id) +{ + struct xsdfec_dev *xsdfec = dev_id; + irqreturn_t ret = IRQ_HANDLED; + u32 ecc_err; + u32 isr_err; + u32 uecc_count; + u32 cecc_count; + u32 isr_err_count; + u32 aecc_count; + u32 tmp; + + WARN_ON(xsdfec->irq != irq); + + /* Mask Interrupts */ + xsdfec_isr_enable(xsdfec, false); + xsdfec_ecc_isr_enable(xsdfec, false); + /* Read ISR */ + ecc_err = xsdfec_regread(xsdfec, XSDFEC_ECC_ISR_ADDR); + isr_err = xsdfec_regread(xsdfec, XSDFEC_ISR_ADDR); + /* Clear the interrupts */ + xsdfec_regwrite(xsdfec, XSDFEC_ECC_ISR_ADDR, ecc_err); + xsdfec_regwrite(xsdfec, XSDFEC_ISR_ADDR, isr_err); + + tmp = ecc_err & XSDFEC_ALL_ECC_ISR_MBE_MASK; + /* Count uncorrectable 2-bit errors */ + uecc_count = hweight32(tmp); + /* Count all ECC errors */ + aecc_count = hweight32(ecc_err); + /* Number of correctable 1-bit ECC error */ + cecc_count = aecc_count - 2 * uecc_count; + /* Count ISR errors */ + isr_err_count = hweight32(isr_err); + dev_dbg(xsdfec->dev, "tmp=%x, uecc=%x, aecc=%x, cecc=%x, isr=%x", tmp, + uecc_count, aecc_count, cecc_count, isr_err_count); + dev_dbg(xsdfec->dev, "uecc=%x, cecc=%x, isr=%x", xsdfec->uecc_count, + xsdfec->cecc_count, xsdfec->isr_err_count); + + spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags); + /* Add new errors to a 2-bits counter */ + if (uecc_count) + xsdfec->uecc_count += uecc_count; + /* Add new errors to a 1-bits counter */ + if (cecc_count) + xsdfec->cecc_count += cecc_count; + /* Add new errors to a ISR counter */ + if (isr_err_count) + xsdfec->isr_err_count += isr_err_count; + + /* Update state/stats flag */ + if (uecc_count) { + if (ecc_err & XSDFEC_ECC_ISR_MBE_MASK) + xsdfec->state = XSDFEC_NEEDS_RESET; + else if (ecc_err & XSDFEC_PL_INIT_ECC_ISR_MBE_MASK) + xsdfec->state = XSDFEC_PL_RECONFIGURE; + xsdfec->stats_updated = true; + xsdfec->state_updated = true; + } + + if (cecc_count) + xsdfec->stats_updated = true; + + if (isr_err_count) { + xsdfec->state = XSDFEC_NEEDS_RESET; + xsdfec->stats_updated = true; + xsdfec->state_updated = true; + } + + spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags); + dev_dbg(xsdfec->dev, "state=%x, stats=%x", xsdfec->state_updated, + xsdfec->stats_updated); + + /* Enable another polling */ + if (xsdfec->state_updated || xsdfec->stats_updated) + wake_up_interruptible(&xsdfec->waitq); + else + ret = IRQ_NONE; + + /* Unmask Interrupts */ + xsdfec_isr_enable(xsdfec, true); + xsdfec_ecc_isr_enable(xsdfec, true); + + return ret; +} + static int xsdfec_clk_init(struct platform_device *pdev, struct xsdfec_clks *clks) { @@ -227,19 +1378,13 @@ static void xsdfec_disable_all_clks(struct xsdfec_clks *clks) clk_disable_unprepare(clks->axi_clk); } -static void xsdfec_idr_remove(struct xsdfec_dev *xsdfec) -{ - mutex_lock(&dev_idr_lock); - idr_remove(&dev_idr, xsdfec->dev_id); - mutex_unlock(&dev_idr_lock); -} - static int xsdfec_probe(struct platform_device *pdev) { struct xsdfec_dev *xsdfec; struct device *dev; struct resource *res; int err; + bool irq_enabled = true; xsdfec = devm_kzalloc(&pdev->dev, sizeof(*xsdfec), GFP_KERNEL); if (!xsdfec) @@ -260,12 +1405,34 @@ static int xsdfec_probe(struct platform_device *pdev) goto err_xsdfec_dev; } + xsdfec->irq = platform_get_irq(pdev, 0); + if (xsdfec->irq < 0) { + dev_dbg(dev, "platform_get_irq failed"); + irq_enabled = false; + } + + err = xsdfec_parse_of(xsdfec); + if (err < 0) + goto err_xsdfec_dev; + + update_config_from_hw(xsdfec); + /* Save driver private data */ platform_set_drvdata(pdev, xsdfec); - mutex_lock(&dev_idr_lock); - err = idr_alloc(&dev_idr, xsdfec->dev_name, 0, 0, GFP_KERNEL); - mutex_unlock(&dev_idr_lock); + if (irq_enabled) { + init_waitqueue_head(&xsdfec->waitq); + /* Register IRQ thread */ + err = devm_request_threaded_irq(dev, xsdfec->irq, NULL, + xsdfec_irq_thread, IRQF_ONESHOT, + "xilinx-sdfec16", xsdfec); + if (err < 0) { + dev_err(dev, "unable to request IRQ%d", xsdfec->irq); + goto err_xsdfec_dev; + } + } + + err = ida_alloc(&dev_nrs, GFP_KERNEL); if (err < 0) goto err_xsdfec_dev; xsdfec->dev_id = err; @@ -278,12 +1445,12 @@ static int xsdfec_probe(struct platform_device *pdev) err = misc_register(&xsdfec->miscdev); if (err) { dev_err(dev, "error:%d. Unable to register device", err); - goto err_xsdfec_idr; + goto err_xsdfec_ida; } return 0; -err_xsdfec_idr: - xsdfec_idr_remove(xsdfec); +err_xsdfec_ida: + ida_free(&dev_nrs, xsdfec->dev_id); err_xsdfec_dev: xsdfec_disable_all_clks(&xsdfec->clks); return err; @@ -295,7 +1462,7 @@ static int xsdfec_remove(struct platform_device *pdev) xsdfec = platform_get_drvdata(pdev); misc_deregister(&xsdfec->miscdev); - xsdfec_idr_remove(xsdfec); + ida_free(&dev_nrs, xsdfec->dev_id); xsdfec_disable_all_clks(&xsdfec->clks); return 0; } @@ -321,8 +1488,6 @@ static int __init xsdfec_init(void) { int err; - mutex_init(&dev_idr_lock); - idr_init(&dev_idr); err = platform_driver_register(&xsdfec_driver); if (err < 0) { pr_err("%s Unabled to register SDFEC driver", __func__); @@ -334,7 +1499,6 @@ static int __init xsdfec_init(void) static void __exit xsdfec_exit(void) { platform_driver_unregister(&xsdfec_driver); - idr_destroy(&dev_idr); } module_init(xsdfec_init); |