diff options
Diffstat (limited to 'arch/arm64')
112 files changed, 3451 insertions, 1411 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 177be0d1d090..b25ed7834f6c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -105,7 +105,6 @@ config ARM64 select HAVE_CONTEXT_TRACKING select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -133,6 +132,8 @@ config ARM64 select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA select MULTI_IRQ_HANDLER + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH select NO_BOOTMEM select OF select OF_EARLY_FLATTREE @@ -142,6 +143,7 @@ config ARM64 select POWER_SUPPLY select REFCOUNT_FULL select SPARSE_IRQ + select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK help @@ -150,9 +152,6 @@ config ARM64 config 64BIT def_bool y -config ARCH_PHYS_ADDR_T_64BIT - def_bool y - config MMU def_bool y @@ -237,24 +236,9 @@ config ZONE_DMA32 config HAVE_GENERIC_GUP def_bool y -config ARCH_DMA_ADDR_T_64BIT - def_bool y - -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - config SMP def_bool y -config SWIOTLB - def_bool y - -config IOMMU_HELPER - def_bool SWIOTLB - config KERNEL_MODE_NEON def_bool y @@ -922,6 +906,22 @@ config HARDEN_BRANCH_PREDICTOR If unsure, say Y. +config HARDEN_EL2_VECTORS + bool "Harden EL2 vector mapping against system register leak" if EXPERT + default y + help + Speculation attacks against some high-performance processors can + be used to leak privileged information such as the vector base + register, resulting in a potential defeat of the EL2 layout + randomization. + + This config option will map the vectors to a fixed location, + independent of the EL2 code mapping, so that revealing VBAR_EL2 + to an attacker does not give away any extra information. This + only gets enabled on affected CPUs. + + If unsure, say Y. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 15402861bb59..3c353b4715dc 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -56,7 +56,11 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) +ifeq ($(cc-name),clang) +KBUILD_CFLAGS += -DCONFIG_ARCH_SUPPORTS_INT128 +else KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128) +endif ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian @@ -74,7 +78,7 @@ LDFLAGS += -maarch64linux UTS_MACHINE := aarch64 endif -CHECKFLAGS += -D__aarch64__ -m64 +CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index c89d0c307f8d..e6b059378dc0 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -17,6 +17,7 @@ /dts-v1/; #include <dt-bindings/reset/altr,rst-mgr-s10.h> #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/clock/stratix10-clock.h> / { compatible = "altr,socfpga-stratix10"; @@ -92,9 +93,32 @@ interrupt-parent = <&intc>; ranges = <0 0 0 0xffffffff>; - clkmgr@ffd1000 { - compatible = "altr,clk-mgr"; + clkmgr: clock-controller@ffd10000 { + compatible = "intel,stratix10-clkmgr"; reg = <0xffd10000 0x1000>; + #clock-cells = <1>; + }; + + clocks { + cb_intosc_hs_div2_clk: cb-intosc-hs-div2-clk { + #clock-cells = <0>; + compatible = "fixed-clock"; + }; + + cb_intosc_ls_clk: cb-intosc-ls-clk { + #clock-cells = <0>; + compatible = "fixed-clock"; + }; + + f2s_free_clk: f2s-free-clk { + #clock-cells = <0>; + compatible = "fixed-clock"; + }; + + osc1: osc1 { + #clock-cells = <0>; + compatible = "fixed-clock"; + }; }; gmac0: ethernet@ff800000 { @@ -105,6 +129,8 @@ mac-address = [00 00 00 00 00 00]; resets = <&rst EMAC0_RESET>; reset-names = "stmmaceth"; + clocks = <&clkmgr STRATIX10_EMAC0_CLK>; + clock-names = "stmmaceth"; status = "disabled"; }; @@ -116,6 +142,8 @@ mac-address = [00 00 00 00 00 00]; resets = <&rst EMAC1_RESET>; reset-names = "stmmaceth"; + clocks = <&clkmgr STRATIX10_EMAC1_CLK>; + clock-names = "stmmaceth"; status = "disabled"; }; @@ -127,6 +155,8 @@ mac-address = [00 00 00 00 00 00]; resets = <&rst EMAC2_RESET>; reset-names = "stmmaceth"; + clocks = <&clkmgr STRATIX10_EMAC2_CLK>; + clock-names = "stmmaceth"; status = "disabled"; }; @@ -177,6 +207,7 @@ reg = <0xffc02800 0x100>; interrupts = <0 103 4>; resets = <&rst I2C0_RESET>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; status = "disabled"; }; @@ -187,6 +218,7 @@ reg = <0xffc02900 0x100>; interrupts = <0 104 4>; resets = <&rst I2C1_RESET>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; status = "disabled"; }; @@ -197,6 +229,7 @@ reg = <0xffc02a00 0x100>; interrupts = <0 105 4>; resets = <&rst I2C2_RESET>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; status = "disabled"; }; @@ -207,6 +240,7 @@ reg = <0xffc02b00 0x100>; interrupts = <0 106 4>; resets = <&rst I2C3_RESET>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; status = "disabled"; }; @@ -217,6 +251,7 @@ reg = <0xffc02c00 0x100>; interrupts = <0 107 4>; resets = <&rst I2C4_RESET>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; status = "disabled"; }; @@ -229,6 +264,9 @@ fifo-depth = <0x400>; resets = <&rst SDMMC_RESET>; reset-names = "reset"; + clocks = <&clkmgr STRATIX10_L4_MP_CLK>, + <&clkmgr STRATIX10_SDMMC_CLK>; + clock-names = "biu", "ciu"; status = "disabled"; }; @@ -237,6 +275,25 @@ reg = <0xffe00000 0x100000>; }; + pdma: pdma@ffda0000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0xffda0000 0x1000>; + interrupts = <0 81 4>, + <0 82 4>, + <0 83 4>, + <0 84 4>, + <0 85 4>, + <0 86 4>, + <0 87 4>, + <0 88 4>, + <0 89 4>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; + clocks = <&clkmgr STRATIX10_L4_MAIN_CLK>; + clock-names = "apb_pclk"; + }; + rst: rstmgr@ffd11000 { #reset-cells = <1>; compatible = "altr,rst-mgr"; @@ -288,24 +345,32 @@ compatible = "snps,dw-apb-timer"; interrupts = <0 113 4>; reg = <0xffc03000 0x100>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; + clock-names = "timer"; }; timer1: timer1@ffc03100 { compatible = "snps,dw-apb-timer"; interrupts = <0 114 4>; reg = <0xffc03100 0x100>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; + clock-names = "timer"; }; timer2: timer2@ffd00000 { compatible = "snps,dw-apb-timer"; interrupts = <0 115 4>; reg = <0xffd00000 0x100>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; + clock-names = "timer"; }; timer3: timer3@ffd00100 { compatible = "snps,dw-apb-timer"; interrupts = <0 116 4>; reg = <0xffd00100 0x100>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; + clock-names = "timer"; }; uart0: serial0@ffc02000 { @@ -315,6 +380,7 @@ reg-shift = <2>; reg-io-width = <4>; resets = <&rst UART0_RESET>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; status = "disabled"; }; @@ -325,6 +391,7 @@ reg-shift = <2>; reg-io-width = <4>; resets = <&rst UART1_RESET>; + clocks = <&clkmgr STRATIX10_L4_SP_CLK>; status = "disabled"; }; @@ -387,5 +454,17 @@ resets = <&rst WATCHDOG3_RESET>; status = "disabled"; }; + + eccmgr { + compatible = "altr,socfpga-s10-ecc-manager"; + interrupts = <0 15 4>, <0 95 4>; + interrupt-controller; + #interrupt-cells = <2>; + + sdramedac { + compatible = "altr,sdram-edac-s10"; + interrupts = <16 4>, <48 4>; + }; + }; }; }; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index eaf13fe29287..f9b1ef12db48 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -50,6 +50,21 @@ /* We expect the bootloader to fill in the reg */ reg = <0 0 0 0>; }; + + ref_033v: 033-v-ref { + compatible = "regulator-fixed"; + regulator-name = "0.33V"; + regulator-min-microvolt = <330000>; + regulator-max-microvolt = <330000>; + }; + + soc { + clocks { + osc1 { + clock-frequency = <25000000>; + }; + }; + }; }; &gpio1 { @@ -79,7 +94,7 @@ rxd2-skew-ps = <420>; /* 0ps */ rxd3-skew-ps = <420>; /* 0ps */ txen-skew-ps = <0>; /* -420ps */ - txc-skew-ps = <1860>; /* 960ps */ + txc-skew-ps = <900>; /* 0ps */ rxdv-skew-ps = <420>; /* 0ps */ rxc-skew-ps = <1680>; /* 780ps */ }; @@ -105,3 +120,30 @@ &watchdog0 { status = "okay"; }; + +&i2c1 { + status = "okay"; + clock-frequency = <100000>; + + adc@14 { + compatible = "lltc,ltc2497"; + reg = <0x14>; + vref-supply = <&ref_033v>; + }; + + temp@4c { + compatible = "maxim,max1619"; + reg = <0x4c>; + }; + + eeprom@51 { + compatible = "atmel,24c32"; + reg = <0x51>; + pagesize = <32>; + }; + + rtc@68 { + compatible = "dallas,ds1339"; + reg = <0x68>; + }; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi index 4eef36b22538..88e712ea757a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi @@ -212,3 +212,7 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb0 { + status = "okay"; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index 22bf37404ff1..3e3eb31748a3 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -271,3 +271,15 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb0 { + status = "okay"; +}; + +&usb2_phy0 { + /* + * even though the schematics don't show it: + * HDMI_5V is also used as supply for the USB VBUS. + */ + phy-supply = <&hdmi_5v>; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts index 69c721a70e44..6739697be1de 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts @@ -215,3 +215,7 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb0 { + status = "okay"; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi index 0a0953fbc7d4..0cfd701809de 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi @@ -185,3 +185,7 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb0 { + status = "okay"; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index e1a39cbed8c9..dba365ed4bd5 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -20,6 +20,67 @@ no-map; }; }; + + soc { + usb0: usb@c9000000 { + status = "disabled"; + compatible = "amlogic,meson-gxl-dwc3"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + clocks = <&clkc CLKID_USB>; + clock-names = "usb_general"; + resets = <&reset RESET_USB_OTG>; + reset-names = "usb_otg"; + + dwc3: dwc3@c9000000 { + compatible = "snps,dwc3"; + reg = <0x0 0xc9000000 0x0 0x100000>; + interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; + dr_mode = "host"; + maximum-speed = "high-speed"; + snps,dis_u2_susphy_quirk; + phys = <&usb3_phy>, <&usb2_phy0>, <&usb2_phy1>; + }; + }; + }; +}; + +&apb { + usb2_phy0: phy@78000 { + compatible = "amlogic,meson-gxl-usb2-phy"; + #phy-cells = <0>; + reg = <0x0 0x78000 0x0 0x20>; + clocks = <&clkc CLKID_USB>; + clock-names = "phy"; + resets = <&reset RESET_USB_OTG>; + reset-names = "phy"; + status = "okay"; + }; + + usb2_phy1: phy@78020 { + compatible = "amlogic,meson-gxl-usb2-phy"; + #phy-cells = <0>; + reg = <0x0 0x78020 0x0 0x20>; + clocks = <&clkc CLKID_USB>; + clock-names = "phy"; + resets = <&reset RESET_USB_OTG>; + reset-names = "phy"; + status = "okay"; + }; + + usb3_phy: phy@78080 { + compatible = "amlogic,meson-gxl-usb3-phy"; + #phy-cells = <0>; + reg = <0x0 0x78080 0x0 0x20>; + interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clkc CLKID_USB>, <&clkc_AO CLKID_AO_CEC_32K>; + clock-names = "phy", "peripheral"; + resets = <&reset RESET_USB_OTG>, <&reset RESET_USB_OTG>; + reset-names = "phy", "peripheral"; + status = "okay"; + }; }; ðmac { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts index 4fd46c1546a7..0868da476e41 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts @@ -406,3 +406,7 @@ status = "okay"; vref-supply = <&vddio_ao18>; }; + +&usb0 { + status = "okay"; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi index d076a7c425dd..247888d68a3a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi @@ -80,6 +80,19 @@ }; }; +&apb { + usb2_phy2: phy@78040 { + compatible = "amlogic,meson-gxl-usb2-phy"; + #phy-cells = <0>; + reg = <0x0 0x78040 0x0 0x20>; + clocks = <&clkc CLKID_USB>; + clock-names = "phy"; + resets = <&reset RESET_USB_OTG>; + reset-names = "phy"; + status = "okay"; + }; +}; + &clkc_AO { compatible = "amlogic,meson-gxm-aoclkc", "amlogic,meson-gx-aoclkc"; }; @@ -100,3 +113,7 @@ &hdmi_tx { compatible = "amlogic,meson-gxm-dw-hdmi", "amlogic,meson-gx-dw-hdmi"; }; + +&dwc3 { + phys = <&usb3_phy>, <&usb2_phy0>, <&usb2_phy1>, <&usb2_phy2>; +}; diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi index 2ac43221ddb6..69804c5f1197 100644 --- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi @@ -56,8 +56,6 @@ gpio_keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; power-button { debounce_interval = <50>; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi index 4b5465da81d8..8c68e0c26f1b 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi @@ -36,11 +36,11 @@ #size-cells = <1>; ranges = <0x0 0x0 0x67d00000 0x00800000>; - sata0: ahci@210000 { + sata0: ahci@0 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00210000 0x1000>; + reg = <0x00000000 0x1000>; reg-names = "ahci"; - interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -52,9 +52,9 @@ }; }; - sata_phy0: sata_phy@212100 { + sata_phy0: sata_phy@2100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00212100 0x1000>; + reg = <0x00002100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -66,11 +66,11 @@ }; }; - sata1: ahci@310000 { + sata1: ahci@10000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00310000 0x1000>; + reg = <0x00010000 0x1000>; reg-names = "ahci"; - interrupts = <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 323 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -82,9 +82,9 @@ }; }; - sata_phy1: sata_phy@312100 { + sata_phy1: sata_phy@12100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00312100 0x1000>; + reg = <0x00012100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -96,11 +96,11 @@ }; }; - sata2: ahci@120000 { + sata2: ahci@20000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00120000 0x1000>; + reg = <0x00020000 0x1000>; reg-names = "ahci"; - interrupts = <GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 325 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -112,9 +112,9 @@ }; }; - sata_phy2: sata_phy@122100 { + sata_phy2: sata_phy@22100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00122100 0x1000>; + reg = <0x00022100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -126,11 +126,11 @@ }; }; - sata3: ahci@130000 { + sata3: ahci@30000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00130000 0x1000>; + reg = <0x00030000 0x1000>; reg-names = "ahci"; - interrupts = <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 327 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -142,9 +142,9 @@ }; }; - sata_phy3: sata_phy@132100 { + sata_phy3: sata_phy@32100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00132100 0x1000>; + reg = <0x00032100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -156,11 +156,11 @@ }; }; - sata4: ahci@330000 { + sata4: ahci@100000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00330000 0x1000>; + reg = <0x00100000 0x1000>; reg-names = "ahci"; - interrupts = <GIC_SPI 351 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 329 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -172,9 +172,9 @@ }; }; - sata_phy4: sata_phy@332100 { + sata_phy4: sata_phy@102100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00332100 0x1000>; + reg = <0x00102100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -186,11 +186,11 @@ }; }; - sata5: ahci@400000 { + sata5: ahci@110000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00400000 0x1000>; + reg = <0x00110000 0x1000>; reg-names = "ahci"; - interrupts = <GIC_SPI 353 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 331 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -202,9 +202,9 @@ }; }; - sata_phy5: sata_phy@402100 { + sata_phy5: sata_phy@112100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00402100 0x1000>; + reg = <0x00112100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -216,11 +216,11 @@ }; }; - sata6: ahci@410000 { + sata6: ahci@120000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00410000 0x1000>; + reg = <0x00120000 0x1000>; reg-names = "ahci"; - interrupts = <GIC_SPI 355 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -232,9 +232,9 @@ }; }; - sata_phy6: sata_phy@412100 { + sata_phy6: sata_phy@122100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00412100 0x1000>; + reg = <0x00122100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; @@ -246,11 +246,11 @@ }; }; - sata7: ahci@420000 { + sata7: ahci@130000 { compatible = "brcm,iproc-ahci", "generic-ahci"; - reg = <0x00420000 0x1000>; + reg = <0x00130000 0x1000>; reg-names = "ahci"; - interrupts = <GIC_SPI 357 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -262,9 +262,9 @@ }; }; - sata_phy7: sata_phy@422100 { + sata_phy7: sata_phy@132100 { compatible = "brcm,iproc-sr-sata-phy"; - reg = <0x00422100 0x1000>; + reg = <0x00132100 0x1000>; reg-names = "phy"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi index c0231d077fa6..1ad8677f6a0a 100644 --- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi @@ -1317,7 +1317,7 @@ reg = <0x14d60000 0x100>; dmas = <&pdma0 31 &pdma0 30>; dma-names = "tx", "rx"; - interrupts = <GIC_SPI 435 IRQ_TYPE_NONE>; + interrupts = <GIC_SPI 435 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cmu_peric CLK_PCLK_I2S1>, <&cmu_peric CLK_PCLK_I2S1>, <&cmu_peric CLK_SCLK_I2S1>; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 724a0d3b7683..edb4ee0b8896 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -299,7 +299,6 @@ /* GPIO blocks 16 thru 19 do not appear to be routed to pins */ dwmmc_0: dwmmc0@f723d000 { - max-frequency = <150000000>; cap-mmc-highspeed; mmc-hs200-1_8v; non-removable; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi index 48cad7919efa..ed2f1237ea1e 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi @@ -38,9 +38,10 @@ compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; clocks = <&CP110_LABEL(clk) 1 3>, <&CP110_LABEL(clk) 1 9>, - <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 18>; + <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 6>, + <&CP110_LABEL(clk) 1 18>; clock-names = "pp_clk", "gop_clk", - "mg_clk", "axi_clk"; + "mg_clk", "mg_core_clk", "axi_clk"; marvell,system-controller = <&CP110_LABEL(syscon0)>; status = "disabled"; dma-coherent; @@ -141,6 +142,8 @@ #size-cells = <0>; compatible = "marvell,xmdio"; reg = <0x12a600 0x10>; + clocks = <&CP110_LABEL(clk) 1 5>, + <&CP110_LABEL(clk) 1 6>, <&CP110_LABEL(clk) 1 18>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi index a8baad7b80df..13f57fff1477 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi @@ -46,7 +46,7 @@ compatible = "ethernet-phy-ieee802.3-c22"; reg = <0x0>; interrupt-parent = <&gpio>; - interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_HIGH>; + interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_LOW>; }; }; }; diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi index 24552f19b3fa..6a573875d45a 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi @@ -36,4 +36,30 @@ drive-strength = <2>; /* 2 MA */ }; }; + + blsp1_uart1_default: blsp1_uart1_default { + mux { + pins = "gpio41", "gpio42", "gpio43", "gpio44"; + function = "blsp_uart2"; + }; + + config { + pins = "gpio41", "gpio42", "gpio43", "gpio44"; + drive-strength = <16>; + bias-disable; + }; + }; + + blsp1_uart1_sleep: blsp1_uart1_sleep { + mux { + pins = "gpio41", "gpio42", "gpio43", "gpio44"; + function = "gpio"; + }; + + config { + pins = "gpio41", "gpio42", "gpio43", "gpio44"; + drive-strength = <2>; + bias-disable; + }; + }; }; diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi index 59b29ddfb6e9..6167af955659 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi @@ -14,6 +14,28 @@ }; }; + bt_en_gpios: bt_en_gpios { + pinconf { + pins = "gpio19"; + function = PMIC_GPIO_FUNC_NORMAL; + output-low; + power-source = <PM8994_GPIO_S4>; // 1.8V + qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>; + bias-pull-down; + }; + }; + + wlan_en_gpios: wlan_en_gpios { + pinconf { + pins = "gpio8"; + function = PMIC_GPIO_FUNC_NORMAL; + output-low; + power-source = <PM8994_GPIO_S4>; // 1.8V + qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>; + bias-pull-down; + }; + }; + volume_up_gpio: pm8996_gpio2 { pinconf { pins = "gpio2"; @@ -26,6 +48,16 @@ }; }; + divclk4_pin_a: divclk4 { + pinconf { + pins = "gpio18"; + function = PMIC_GPIO_FUNC_FUNC2; + + bias-disable; + power-source = <PM8994_GPIO_S4>; + }; + }; + usb3_vbus_det_gpio: pm8996_gpio22 { pinconf { pins = "gpio22"; diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 1c8f1b86472d..4b8bb026346e 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -23,6 +23,7 @@ aliases { serial0 = &blsp2_uart1; serial1 = &blsp2_uart2; + serial2 = &blsp1_uart1; i2c0 = &blsp1_i2c2; i2c1 = &blsp2_i2c1; i2c2 = &blsp2_i2c0; @@ -34,7 +35,36 @@ stdout-path = "serial0:115200n8"; }; + clocks { + divclk4: divclk4 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "divclk4"; + + pinctrl-names = "default"; + pinctrl-0 = <&divclk4_pin_a>; + }; + }; + soc { + serial@7570000 { + label = "BT-UART"; + status = "okay"; + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&blsp1_uart1_default>; + pinctrl-1 = <&blsp1_uart1_sleep>; + + bluetooth { + compatible = "qcom,qca6174-bt"; + + /* bt_disable_n gpio */ + enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>; + + clocks = <&divclk4>; + }; + }; + serial@75b0000 { label = "LS-UART1"; status = "okay"; @@ -139,9 +169,40 @@ pinctrl-0 = <&usb2_vbus_det_gpio>; }; + bt_en: bt-en-1-8v { + pinctrl-names = "default"; + pinctrl-0 = <&bt_en_gpios>; + compatible = "regulator-fixed"; + regulator-name = "bt-en-regulator"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + + /* WLAN card specific delay */ + startup-delay-us = <70000>; + enable-active-high; + }; + + wlan_en: wlan-en-1-8v { + pinctrl-names = "default"; + pinctrl-0 = <&wlan_en_gpios>; + compatible = "regulator-fixed"; + regulator-name = "wlan-en-regulator"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + + gpio = <&pm8994_gpios 8 0>; + + /* WLAN card specific delay */ + startup-delay-us = <70000>; + enable-active-high; + }; + agnoc@0 { qcom,pcie@600000 { + status = "okay"; perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>; + vddpe-supply = <&wlan_en>; + vddpe1-supply = <&bt_en>; }; qcom,pcie@608000 { diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 410ae787ebb4..f8e49d0b4681 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -419,6 +419,16 @@ #clock-cells = <1>; }; + blsp1_uart1: serial@7570000 { + compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm"; + reg = <0x07570000 0x1000>; + interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>, + <&gcc GCC_BLSP1_AHB_CLK>; + clock-names = "core", "iface"; + status = "disabled"; + }; + blsp1_spi0: spi@7575000 { compatible = "qcom,spi-qup-v2.2.1"; reg = <0x07575000 0x600>; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi index e62bda1cf2d9..c32dd3419c87 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi @@ -414,7 +414,7 @@ mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; - cdns,phy-input-delay-legacy = <4>; + cdns,phy-input-delay-legacy = <9>; cdns,phy-input-delay-mmc-highspeed = <2>; cdns,phy-input-delay-mmc-ddr = <3>; cdns,phy-dll-delay-sdclk = <21>; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts index 2c1a92fafbfb..440c2e6a638b 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts @@ -67,3 +67,11 @@ reg = <0>; }; }; + +&pinctrl_ether_rgmii { + tx { + pins = "RGMII_TXCLK", "RGMII_TXD0", "RGMII_TXD1", + "RGMII_TXD2", "RGMII_TXD3", "RGMII_TXCTL"; + drive-strength = <9>; + }; +}; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi index 9efe20d07589..3a5ed789c056 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi @@ -519,7 +519,7 @@ mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; - cdns,phy-input-delay-legacy = <4>; + cdns,phy-input-delay-legacy = <9>; cdns,phy-input-delay-mmc-highspeed = <2>; cdns,phy-input-delay-mmc-ddr = <3>; cdns,phy-dll-delay-sdclk = <21>; diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index 7c8f710d9bfa..e85d6ddea3c2 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -334,7 +334,7 @@ mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; - cdns,phy-input-delay-legacy = <4>; + cdns,phy-input-delay-legacy = <9>; cdns,phy-input-delay-mmc-highspeed = <2>; cdns,phy-input-delay-mmc-ddr = <3>; cdns,phy-dll-delay-sdclk = <21>; diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index cb5a243110c4..e3fdb0fd6f70 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -47,6 +47,12 @@ config CRYPTO_SM3_ARM64_CE select CRYPTO_HASH select CRYPTO_SM3 +config CRYPTO_SM4_ARM64_CE + tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_SM4 + config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 8df9f326f449..bcafd016618e 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -23,6 +23,9 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o +sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o + obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o @@ -78,4 +81,4 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl $(call cmd,perlasm) endif -.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S +targets += sha256-core.S sha512-core.S diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index e3a375c4cb83..88f5aef7934c 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -19,24 +19,33 @@ * u32 *macp, u8 const rk[], u32 rounds); */ ENTRY(ce_aes_ccm_auth_data) - ldr w8, [x3] /* leftover from prev round? */ + frame_push 7 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + + ldr w25, [x22] /* leftover from prev round? */ ld1 {v0.16b}, [x0] /* load mac */ - cbz w8, 1f - sub w8, w8, #16 + cbz w25, 1f + sub w25, w25, #16 eor v1.16b, v1.16b, v1.16b -0: ldrb w7, [x1], #1 /* get 1 byte of input */ - subs w2, w2, #1 - add w8, w8, #1 +0: ldrb w7, [x20], #1 /* get 1 byte of input */ + subs w21, w21, #1 + add w25, w25, #1 ins v1.b[0], w7 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ beq 8f /* out of input? */ - cbnz w8, 0b + cbnz w25, 0b eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.4s}, [x4] /* load first round key */ - prfm pldl1strm, [x1] - cmp w5, #12 /* which key size? */ - add x6, x4, #16 - sub w7, w5, #2 /* modified # of rounds */ +1: ld1 {v3.4s}, [x23] /* load first round key */ + prfm pldl1strm, [x20] + cmp w24, #12 /* which key size? */ + add x6, x23, #16 + sub w7, w24, #2 /* modified # of rounds */ bmi 2f bne 5f mov v5.16b, v3.16b @@ -55,33 +64,43 @@ ENTRY(ce_aes_ccm_auth_data) ld1 {v5.4s}, [x6], #16 /* load next round key */ bpl 3b aese v0.16b, v4.16b - subs w2, w2, #16 /* last data? */ + subs w21, w21, #16 /* last data? */ eor v0.16b, v0.16b, v5.16b /* final round */ bmi 6f - ld1 {v1.16b}, [x1], #16 /* load next input block */ + ld1 {v1.16b}, [x20], #16 /* load next input block */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ - bne 1b -6: st1 {v0.16b}, [x0] /* store mac */ + beq 6f + + if_will_cond_yield_neon + st1 {v0.16b}, [x19] /* store mac */ + do_cond_yield_neon + ld1 {v0.16b}, [x19] /* reload mac */ + endif_yield_neon + + b 1b +6: st1 {v0.16b}, [x19] /* store mac */ beq 10f - adds w2, w2, #16 + adds w21, w21, #16 beq 10f - mov w8, w2 -7: ldrb w7, [x1], #1 + mov w25, w21 +7: ldrb w7, [x20], #1 umov w6, v0.b[0] eor w6, w6, w7 - strb w6, [x0], #1 - subs w2, w2, #1 + strb w6, [x19], #1 + subs w21, w21, #1 beq 10f ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ b 7b -8: mov w7, w8 - add w8, w8, #16 +8: mov w7, w25 + add w25, w25, #16 9: ext v1.16b, v1.16b, v1.16b, #1 adds w7, w7, #1 bne 9b eor v0.16b, v0.16b, v1.16b - st1 {v0.16b}, [x0] -10: str w8, [x3] + st1 {v0.16b}, [x19] +10: str w25, [x22] + + frame_pop ret ENDPROC(ce_aes_ccm_auth_data) @@ -126,19 +145,29 @@ ENTRY(ce_aes_ccm_final) ENDPROC(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc - ldr x8, [x6, #8] /* load lower ctr */ - ld1 {v0.16b}, [x5] /* load mac */ -CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ + frame_push 8 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + mov x25, x6 + + ldr x26, [x25, #8] /* load lower ctr */ + ld1 {v0.16b}, [x24] /* load mac */ +CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ - ld1 {v1.8b}, [x6] /* load upper ctr */ - prfm pldl1strm, [x1] - add x8, x8, #1 - rev x9, x8 - cmp w4, #12 /* which key size? */ - sub w7, w4, #2 /* get modified # of rounds */ + ld1 {v1.8b}, [x25] /* load upper ctr */ + prfm pldl1strm, [x20] + add x26, x26, #1 + rev x9, x26 + cmp w23, #12 /* which key size? */ + sub w7, w23, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.4s}, [x3] /* load first round key */ - add x10, x3, #16 + ld1 {v3.4s}, [x22] /* load first round key */ + add x10, x22, #16 bmi 1f bne 4f mov v5.16b, v3.16b @@ -165,9 +194,9 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ bpl 2b aese v0.16b, v4.16b aese v1.16b, v4.16b - subs w2, w2, #16 - bmi 6f /* partial block? */ - ld1 {v2.16b}, [x1], #16 /* load next input block */ + subs w21, w21, #16 + bmi 7f /* partial block? */ + ld1 {v2.16b}, [x20], #16 /* load next input block */ .if \enc == 1 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ @@ -176,18 +205,29 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ eor v1.16b, v2.16b, v5.16b /* final round enc */ .endif eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ - st1 {v1.16b}, [x0], #16 /* write output block */ - bne 0b -CPU_LE( rev x8, x8 ) - st1 {v0.16b}, [x5] /* store mac */ - str x8, [x6, #8] /* store lsb end of ctr (BE) */ -5: ret - -6: eor v0.16b, v0.16b, v5.16b /* final round mac */ + st1 {v1.16b}, [x19], #16 /* write output block */ + beq 5f + + if_will_cond_yield_neon + st1 {v0.16b}, [x24] /* store mac */ + do_cond_yield_neon + ld1 {v0.16b}, [x24] /* reload mac */ + endif_yield_neon + + b 0b +5: +CPU_LE( rev x26, x26 ) + st1 {v0.16b}, [x24] /* store mac */ + str x26, [x25, #8] /* store lsb end of ctr (BE) */ + +6: frame_pop + ret + +7: eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ - st1 {v0.16b}, [x5] /* store mac */ - add w2, w2, #16 /* process partial tail block */ -7: ldrb w9, [x1], #1 /* get 1 byte of input */ + st1 {v0.16b}, [x24] /* store mac */ + add w21, w21, #16 /* process partial tail block */ +8: ldrb w9, [x20], #1 /* get 1 byte of input */ umov w6, v1.b[0] /* get top crypted ctr byte */ umov w7, v0.b[0] /* get top mac byte */ .if \enc == 1 @@ -197,13 +237,13 @@ CPU_LE( rev x8, x8 ) eor w9, w9, w6 eor w7, w7, w9 .endif - strb w9, [x0], #1 /* store out byte */ - strb w7, [x5], #1 /* store mac byte */ - subs w2, w2, #1 - beq 5b + strb w9, [x19], #1 /* store out byte */ + strb w7, [x24], #1 /* store mac byte */ + subs w21, w21, #1 + beq 6b ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ - b 7b + b 8b .endm /* diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 50330f5c3adc..623e74ed1c67 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -30,18 +30,21 @@ .endm /* prepare for encryption with key in rk[] */ - .macro enc_prepare, rounds, rk, ignore - load_round_keys \rounds, \rk + .macro enc_prepare, rounds, rk, temp + mov \temp, \rk + load_round_keys \rounds, \temp .endm /* prepare for encryption (again) but with new key in rk[] */ - .macro enc_switch_key, rounds, rk, ignore - load_round_keys \rounds, \rk + .macro enc_switch_key, rounds, rk, temp + mov \temp, \rk + load_round_keys \rounds, \temp .endm /* prepare for decryption with key in rk[] */ - .macro dec_prepare, rounds, rk, ignore - load_round_keys \rounds, \rk + .macro dec_prepare, rounds, rk, temp + mov \temp, \rk + load_round_keys \rounds, \temp .endm .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index a68412e1e3a4..483a7130cf0e 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -14,12 +14,12 @@ .align 4 aes_encrypt_block4x: - encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 + encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7 ret ENDPROC(aes_encrypt_block4x) aes_decrypt_block4x: - decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 + decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7 ret ENDPROC(aes_decrypt_block4x) @@ -31,57 +31,71 @@ ENDPROC(aes_decrypt_block4x) */ AES_ENTRY(aes_ecb_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 5 - enc_prepare w3, x2, x5 + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + +.Lecbencrestart: + enc_prepare w22, x21, x5 .LecbencloopNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lecbenc1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */ bl aes_encrypt_block4x - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 + cond_yield_neon .Lecbencrestart b .LecbencloopNx .Lecbenc1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lecbencout .Lecbencloop: - ld1 {v0.16b}, [x1], #16 /* get next pt block */ - encrypt_block v0, w3, x2, x5, w6 - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + ld1 {v0.16b}, [x20], #16 /* get next pt block */ + encrypt_block v0, w22, x21, x5, w6 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 bne .Lecbencloop .Lecbencout: - ldp x29, x30, [sp], #16 + frame_pop ret AES_ENDPROC(aes_ecb_encrypt) AES_ENTRY(aes_ecb_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 5 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 - dec_prepare w3, x2, x5 +.Lecbdecrestart: + dec_prepare w22, x21, x5 .LecbdecloopNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lecbdec1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */ bl aes_decrypt_block4x - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 + cond_yield_neon .Lecbdecrestart b .LecbdecloopNx .Lecbdec1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lecbdecout .Lecbdecloop: - ld1 {v0.16b}, [x1], #16 /* get next ct block */ - decrypt_block v0, w3, x2, x5, w6 - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + ld1 {v0.16b}, [x20], #16 /* get next ct block */ + decrypt_block v0, w22, x21, x5, w6 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 bne .Lecbdecloop .Lecbdecout: - ldp x29, x30, [sp], #16 + frame_pop ret AES_ENDPROC(aes_ecb_decrypt) @@ -94,78 +108,100 @@ AES_ENDPROC(aes_ecb_decrypt) */ AES_ENTRY(aes_cbc_encrypt) - ld1 {v4.16b}, [x5] /* get iv */ - enc_prepare w3, x2, x6 + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + +.Lcbcencrestart: + ld1 {v4.16b}, [x24] /* get iv */ + enc_prepare w22, x21, x6 .Lcbcencloop4x: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lcbcenc1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */ eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ - encrypt_block v0, w3, x2, x6, w7 + encrypt_block v0, w22, x21, x6, w7 eor v1.16b, v1.16b, v0.16b - encrypt_block v1, w3, x2, x6, w7 + encrypt_block v1, w22, x21, x6, w7 eor v2.16b, v2.16b, v1.16b - encrypt_block v2, w3, x2, x6, w7 + encrypt_block v2, w22, x21, x6, w7 eor v3.16b, v3.16b, v2.16b - encrypt_block v3, w3, x2, x6, w7 - st1 {v0.16b-v3.16b}, [x0], #64 + encrypt_block v3, w22, x21, x6, w7 + st1 {v0.16b-v3.16b}, [x19], #64 mov v4.16b, v3.16b + st1 {v4.16b}, [x24] /* return iv */ + cond_yield_neon .Lcbcencrestart b .Lcbcencloop4x .Lcbcenc1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lcbcencout .Lcbcencloop: - ld1 {v0.16b}, [x1], #16 /* get next pt block */ + ld1 {v0.16b}, [x20], #16 /* get next pt block */ eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ - encrypt_block v4, w3, x2, x6, w7 - st1 {v4.16b}, [x0], #16 - subs w4, w4, #1 + encrypt_block v4, w22, x21, x6, w7 + st1 {v4.16b}, [x19], #16 + subs w23, w23, #1 bne .Lcbcencloop .Lcbcencout: - st1 {v4.16b}, [x5] /* return iv */ + st1 {v4.16b}, [x24] /* return iv */ + frame_pop ret AES_ENDPROC(aes_cbc_encrypt) AES_ENTRY(aes_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 - ld1 {v7.16b}, [x5] /* get iv */ - dec_prepare w3, x2, x6 +.Lcbcdecrestart: + ld1 {v7.16b}, [x24] /* get iv */ + dec_prepare w22, x21, x6 .LcbcdecloopNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lcbcdec1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */ mov v4.16b, v0.16b mov v5.16b, v1.16b mov v6.16b, v2.16b bl aes_decrypt_block4x - sub x1, x1, #16 + sub x20, x20, #16 eor v0.16b, v0.16b, v7.16b eor v1.16b, v1.16b, v4.16b - ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ + ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */ eor v2.16b, v2.16b, v5.16b eor v3.16b, v3.16b, v6.16b - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 + st1 {v7.16b}, [x24] /* return iv */ + cond_yield_neon .Lcbcdecrestart b .LcbcdecloopNx .Lcbcdec1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lcbcdecout .Lcbcdecloop: - ld1 {v1.16b}, [x1], #16 /* get next ct block */ + ld1 {v1.16b}, [x20], #16 /* get next ct block */ mov v0.16b, v1.16b /* ...and copy to v0 */ - decrypt_block v0, w3, x2, x6, w7 + decrypt_block v0, w22, x21, x6, w7 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ mov v7.16b, v1.16b /* ct is next iv */ - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 bne .Lcbcdecloop .Lcbcdecout: - st1 {v7.16b}, [x5] /* return iv */ - ldp x29, x30, [sp], #16 + st1 {v7.16b}, [x24] /* return iv */ + frame_pop ret AES_ENDPROC(aes_cbc_decrypt) @@ -176,19 +212,26 @@ AES_ENDPROC(aes_cbc_decrypt) */ AES_ENTRY(aes_ctr_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 - enc_prepare w3, x2, x6 - ld1 {v4.16b}, [x5] + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + +.Lctrrestart: + enc_prepare w22, x21, x6 + ld1 {v4.16b}, [x24] umov x6, v4.d[1] /* keep swabbed ctr in reg */ rev x6, x6 - cmn w6, w4 /* 32 bit overflow? */ - bcs .Lctrloop .LctrloopNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lctr1x + cmn w6, #4 /* 32 bit overflow? */ + bcs .Lctr1x ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ dup v7.4s, w6 mov v0.16b, v4.16b @@ -200,25 +243,27 @@ AES_ENTRY(aes_ctr_encrypt) mov v1.s[3], v8.s[0] mov v2.s[3], v8.s[1] mov v3.s[3], v8.s[2] - ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ + ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */ bl aes_encrypt_block4x eor v0.16b, v5.16b, v0.16b - ld1 {v5.16b}, [x1], #16 /* get 1 input block */ + ld1 {v5.16b}, [x20], #16 /* get 1 input block */ eor v1.16b, v6.16b, v1.16b eor v2.16b, v7.16b, v2.16b eor v3.16b, v5.16b, v3.16b - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 add x6, x6, #4 rev x7, x6 ins v4.d[1], x7 - cbz w4, .Lctrout + cbz w23, .Lctrout + st1 {v4.16b}, [x24] /* return next CTR value */ + cond_yield_neon .Lctrrestart b .LctrloopNx .Lctr1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lctrout .Lctrloop: mov v0.16b, v4.16b - encrypt_block v0, w3, x2, x8, w7 + encrypt_block v0, w22, x21, x8, w7 adds x6, x6, #1 /* increment BE ctr */ rev x7, x6 @@ -226,22 +271,22 @@ AES_ENTRY(aes_ctr_encrypt) bcs .Lctrcarry /* overflow? */ .Lctrcarrydone: - subs w4, w4, #1 + subs w23, w23, #1 bmi .Lctrtailblock /* blocks <0 means tail block */ - ld1 {v3.16b}, [x1], #16 + ld1 {v3.16b}, [x20], #16 eor v3.16b, v0.16b, v3.16b - st1 {v3.16b}, [x0], #16 + st1 {v3.16b}, [x19], #16 bne .Lctrloop .Lctrout: - st1 {v4.16b}, [x5] /* return next CTR value */ - ldp x29, x30, [sp], #16 + st1 {v4.16b}, [x24] /* return next CTR value */ +.Lctrret: + frame_pop ret .Lctrtailblock: - st1 {v0.16b}, [x0] - ldp x29, x30, [sp], #16 - ret + st1 {v0.16b}, [x19] + b .Lctrret .Lctrcarry: umov x7, v4.d[0] /* load upper word of ctr */ @@ -274,10 +319,16 @@ CPU_LE( .quad 1, 0x87 ) CPU_BE( .quad 0x87, 1 ) AES_ENTRY(aes_xts_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x6 - ld1 {v4.16b}, [x6] + ld1 {v4.16b}, [x24] cbz w7, .Lxtsencnotfirst enc_prepare w3, x5, x8 @@ -286,15 +337,17 @@ AES_ENTRY(aes_xts_encrypt) ldr q7, .Lxts_mul_x b .LxtsencNx +.Lxtsencrestart: + ld1 {v4.16b}, [x24] .Lxtsencnotfirst: - enc_prepare w3, x2, x8 + enc_prepare w22, x21, x8 .LxtsencloopNx: ldr q7, .Lxts_mul_x next_tweak v4, v4, v7, v8 .LxtsencNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lxtsenc1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */ next_tweak v5, v4, v7, v8 eor v0.16b, v0.16b, v4.16b next_tweak v6, v5, v7, v8 @@ -307,35 +360,43 @@ AES_ENTRY(aes_xts_encrypt) eor v0.16b, v0.16b, v4.16b eor v1.16b, v1.16b, v5.16b eor v2.16b, v2.16b, v6.16b - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 mov v4.16b, v7.16b - cbz w4, .Lxtsencout + cbz w23, .Lxtsencout + st1 {v4.16b}, [x24] + cond_yield_neon .Lxtsencrestart b .LxtsencloopNx .Lxtsenc1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lxtsencout .Lxtsencloop: - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 eor v0.16b, v1.16b, v4.16b - encrypt_block v0, w3, x2, x8, w7 + encrypt_block v0, w22, x21, x8, w7 eor v0.16b, v0.16b, v4.16b - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 beq .Lxtsencout next_tweak v4, v4, v7, v8 b .Lxtsencloop .Lxtsencout: - st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 + st1 {v4.16b}, [x24] + frame_pop ret AES_ENDPROC(aes_xts_encrypt) AES_ENTRY(aes_xts_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 - ld1 {v4.16b}, [x6] + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x6 + + ld1 {v4.16b}, [x24] cbz w7, .Lxtsdecnotfirst enc_prepare w3, x5, x8 @@ -344,15 +405,17 @@ AES_ENTRY(aes_xts_decrypt) ldr q7, .Lxts_mul_x b .LxtsdecNx +.Lxtsdecrestart: + ld1 {v4.16b}, [x24] .Lxtsdecnotfirst: - dec_prepare w3, x2, x8 + dec_prepare w22, x21, x8 .LxtsdecloopNx: ldr q7, .Lxts_mul_x next_tweak v4, v4, v7, v8 .LxtsdecNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lxtsdec1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */ next_tweak v5, v4, v7, v8 eor v0.16b, v0.16b, v4.16b next_tweak v6, v5, v7, v8 @@ -365,26 +428,28 @@ AES_ENTRY(aes_xts_decrypt) eor v0.16b, v0.16b, v4.16b eor v1.16b, v1.16b, v5.16b eor v2.16b, v2.16b, v6.16b - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 mov v4.16b, v7.16b - cbz w4, .Lxtsdecout + cbz w23, .Lxtsdecout + st1 {v4.16b}, [x24] + cond_yield_neon .Lxtsdecrestart b .LxtsdecloopNx .Lxtsdec1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lxtsdecout .Lxtsdecloop: - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 eor v0.16b, v1.16b, v4.16b - decrypt_block v0, w3, x2, x8, w7 + decrypt_block v0, w22, x21, x8, w7 eor v0.16b, v0.16b, v4.16b - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 beq .Lxtsdecout next_tweak v4, v4, v7, v8 b .Lxtsdecloop .Lxtsdecout: - st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 + st1 {v4.16b}, [x24] + frame_pop ret AES_ENDPROC(aes_xts_decrypt) @@ -393,43 +458,61 @@ AES_ENDPROC(aes_xts_decrypt) * int blocks, u8 dg[], int enc_before, int enc_after) */ AES_ENTRY(aes_mac_update) - ld1 {v0.16b}, [x4] /* get dg */ + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x6 + + ld1 {v0.16b}, [x23] /* get dg */ enc_prepare w2, x1, x7 cbz w5, .Lmacloop4x encrypt_block v0, w2, x1, x7, w8 .Lmacloop4x: - subs w3, w3, #4 + subs w22, w22, #4 bmi .Lmac1x - ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ + ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ - encrypt_block v0, w2, x1, x7, w8 + encrypt_block v0, w21, x20, x7, w8 eor v0.16b, v0.16b, v2.16b - encrypt_block v0, w2, x1, x7, w8 + encrypt_block v0, w21, x20, x7, w8 eor v0.16b, v0.16b, v3.16b - encrypt_block v0, w2, x1, x7, w8 + encrypt_block v0, w21, x20, x7, w8 eor v0.16b, v0.16b, v4.16b - cmp w3, wzr - csinv x5, x6, xzr, eq + cmp w22, wzr + csinv x5, x24, xzr, eq cbz w5, .Lmacout - encrypt_block v0, w2, x1, x7, w8 + encrypt_block v0, w21, x20, x7, w8 + st1 {v0.16b}, [x23] /* return dg */ + cond_yield_neon .Lmacrestart b .Lmacloop4x .Lmac1x: - add w3, w3, #4 + add w22, w22, #4 .Lmacloop: - cbz w3, .Lmacout - ld1 {v1.16b}, [x0], #16 /* get next pt block */ + cbz w22, .Lmacout + ld1 {v1.16b}, [x19], #16 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ - subs w3, w3, #1 - csinv x5, x6, xzr, eq + subs w22, w22, #1 + csinv x5, x24, xzr, eq cbz w5, .Lmacout - encrypt_block v0, w2, x1, x7, w8 +.Lmacenc: + encrypt_block v0, w21, x20, x7, w8 b .Lmacloop .Lmacout: - st1 {v0.16b}, [x4] /* return dg */ + st1 {v0.16b}, [x23] /* return dg */ + frame_pop ret + +.Lmacrestart: + ld1 {v0.16b}, [x23] /* get dg */ + enc_prepare w21, x20, x0 + b .Lmacloop4x AES_ENDPROC(aes_mac_update) diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index ca0472500433..e613a87f8b53 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -565,54 +565,61 @@ ENDPROC(aesbs_decrypt8) * int blocks) */ .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 5 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 99: mov x5, #1 - lsl x5, x5, x4 - subs w4, w4, #8 - csel x4, x4, xzr, pl + lsl x5, x5, x23 + subs w23, w23, #8 + csel x23, x23, xzr, pl csel x5, x5, xzr, mi - ld1 {v0.16b}, [x1], #16 + ld1 {v0.16b}, [x20], #16 tbnz x5, #1, 0f - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 tbnz x5, #2, 0f - ld1 {v2.16b}, [x1], #16 + ld1 {v2.16b}, [x20], #16 tbnz x5, #3, 0f - ld1 {v3.16b}, [x1], #16 + ld1 {v3.16b}, [x20], #16 tbnz x5, #4, 0f - ld1 {v4.16b}, [x1], #16 + ld1 {v4.16b}, [x20], #16 tbnz x5, #5, 0f - ld1 {v5.16b}, [x1], #16 + ld1 {v5.16b}, [x20], #16 tbnz x5, #6, 0f - ld1 {v6.16b}, [x1], #16 + ld1 {v6.16b}, [x20], #16 tbnz x5, #7, 0f - ld1 {v7.16b}, [x1], #16 + ld1 {v7.16b}, [x20], #16 -0: mov bskey, x2 - mov rounds, x3 +0: mov bskey, x21 + mov rounds, x22 bl \do8 - st1 {\o0\().16b}, [x0], #16 + st1 {\o0\().16b}, [x19], #16 tbnz x5, #1, 1f - st1 {\o1\().16b}, [x0], #16 + st1 {\o1\().16b}, [x19], #16 tbnz x5, #2, 1f - st1 {\o2\().16b}, [x0], #16 + st1 {\o2\().16b}, [x19], #16 tbnz x5, #3, 1f - st1 {\o3\().16b}, [x0], #16 + st1 {\o3\().16b}, [x19], #16 tbnz x5, #4, 1f - st1 {\o4\().16b}, [x0], #16 + st1 {\o4\().16b}, [x19], #16 tbnz x5, #5, 1f - st1 {\o5\().16b}, [x0], #16 + st1 {\o5\().16b}, [x19], #16 tbnz x5, #6, 1f - st1 {\o6\().16b}, [x0], #16 + st1 {\o6\().16b}, [x19], #16 tbnz x5, #7, 1f - st1 {\o7\().16b}, [x0], #16 + st1 {\o7\().16b}, [x19], #16 - cbnz x4, 99b + cbz x23, 1f + cond_yield_neon + b 99b -1: ldp x29, x30, [sp], #16 +1: frame_pop ret .endm @@ -632,43 +639,49 @@ ENDPROC(aesbs_ecb_decrypt) */ .align 4 ENTRY(aesbs_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 99: mov x6, #1 - lsl x6, x6, x4 - subs w4, w4, #8 - csel x4, x4, xzr, pl + lsl x6, x6, x23 + subs w23, w23, #8 + csel x23, x23, xzr, pl csel x6, x6, xzr, mi - ld1 {v0.16b}, [x1], #16 + ld1 {v0.16b}, [x20], #16 mov v25.16b, v0.16b tbnz x6, #1, 0f - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 mov v26.16b, v1.16b tbnz x6, #2, 0f - ld1 {v2.16b}, [x1], #16 + ld1 {v2.16b}, [x20], #16 mov v27.16b, v2.16b tbnz x6, #3, 0f - ld1 {v3.16b}, [x1], #16 + ld1 {v3.16b}, [x20], #16 mov v28.16b, v3.16b tbnz x6, #4, 0f - ld1 {v4.16b}, [x1], #16 + ld1 {v4.16b}, [x20], #16 mov v29.16b, v4.16b tbnz x6, #5, 0f - ld1 {v5.16b}, [x1], #16 + ld1 {v5.16b}, [x20], #16 mov v30.16b, v5.16b tbnz x6, #6, 0f - ld1 {v6.16b}, [x1], #16 + ld1 {v6.16b}, [x20], #16 mov v31.16b, v6.16b tbnz x6, #7, 0f - ld1 {v7.16b}, [x1] + ld1 {v7.16b}, [x20] -0: mov bskey, x2 - mov rounds, x3 +0: mov bskey, x21 + mov rounds, x22 bl aesbs_decrypt8 - ld1 {v24.16b}, [x5] // load IV + ld1 {v24.16b}, [x24] // load IV eor v1.16b, v1.16b, v25.16b eor v6.16b, v6.16b, v26.16b @@ -679,34 +692,36 @@ ENTRY(aesbs_cbc_decrypt) eor v3.16b, v3.16b, v30.16b eor v5.16b, v5.16b, v31.16b - st1 {v0.16b}, [x0], #16 + st1 {v0.16b}, [x19], #16 mov v24.16b, v25.16b tbnz x6, #1, 1f - st1 {v1.16b}, [x0], #16 + st1 {v1.16b}, [x19], #16 mov v24.16b, v26.16b tbnz x6, #2, 1f - st1 {v6.16b}, [x0], #16 + st1 {v6.16b}, [x19], #16 mov v24.16b, v27.16b tbnz x6, #3, 1f - st1 {v4.16b}, [x0], #16 + st1 {v4.16b}, [x19], #16 mov v24.16b, v28.16b tbnz x6, #4, 1f - st1 {v2.16b}, [x0], #16 + st1 {v2.16b}, [x19], #16 mov v24.16b, v29.16b tbnz x6, #5, 1f - st1 {v7.16b}, [x0], #16 + st1 {v7.16b}, [x19], #16 mov v24.16b, v30.16b tbnz x6, #6, 1f - st1 {v3.16b}, [x0], #16 + st1 {v3.16b}, [x19], #16 mov v24.16b, v31.16b tbnz x6, #7, 1f - ld1 {v24.16b}, [x1], #16 - st1 {v5.16b}, [x0], #16 -1: st1 {v24.16b}, [x5] // store IV + ld1 {v24.16b}, [x20], #16 + st1 {v5.16b}, [x19], #16 +1: st1 {v24.16b}, [x24] // store IV - cbnz x4, 99b + cbz x23, 2f + cond_yield_neon + b 99b - ldp x29, x30, [sp], #16 +2: frame_pop ret ENDPROC(aesbs_cbc_decrypt) @@ -731,87 +746,93 @@ CPU_BE( .quad 0x87, 1 ) */ __xts_crypt8: mov x6, #1 - lsl x6, x6, x4 - subs w4, w4, #8 - csel x4, x4, xzr, pl + lsl x6, x6, x23 + subs w23, w23, #8 + csel x23, x23, xzr, pl csel x6, x6, xzr, mi - ld1 {v0.16b}, [x1], #16 + ld1 {v0.16b}, [x20], #16 next_tweak v26, v25, v30, v31 eor v0.16b, v0.16b, v25.16b tbnz x6, #1, 0f - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 next_tweak v27, v26, v30, v31 eor v1.16b, v1.16b, v26.16b tbnz x6, #2, 0f - ld1 {v2.16b}, [x1], #16 + ld1 {v2.16b}, [x20], #16 next_tweak v28, v27, v30, v31 eor v2.16b, v2.16b, v27.16b tbnz x6, #3, 0f - ld1 {v3.16b}, [x1], #16 + ld1 {v3.16b}, [x20], #16 next_tweak v29, v28, v30, v31 eor v3.16b, v3.16b, v28.16b tbnz x6, #4, 0f - ld1 {v4.16b}, [x1], #16 - str q29, [sp, #16] + ld1 {v4.16b}, [x20], #16 + str q29, [sp, #.Lframe_local_offset] eor v4.16b, v4.16b, v29.16b next_tweak v29, v29, v30, v31 tbnz x6, #5, 0f - ld1 {v5.16b}, [x1], #16 - str q29, [sp, #32] + ld1 {v5.16b}, [x20], #16 + str q29, [sp, #.Lframe_local_offset + 16] eor v5.16b, v5.16b, v29.16b next_tweak v29, v29, v30, v31 tbnz x6, #6, 0f - ld1 {v6.16b}, [x1], #16 - str q29, [sp, #48] + ld1 {v6.16b}, [x20], #16 + str q29, [sp, #.Lframe_local_offset + 32] eor v6.16b, v6.16b, v29.16b next_tweak v29, v29, v30, v31 tbnz x6, #7, 0f - ld1 {v7.16b}, [x1], #16 - str q29, [sp, #64] + ld1 {v7.16b}, [x20], #16 + str q29, [sp, #.Lframe_local_offset + 48] eor v7.16b, v7.16b, v29.16b next_tweak v29, v29, v30, v31 -0: mov bskey, x2 - mov rounds, x3 +0: mov bskey, x21 + mov rounds, x22 br x7 ENDPROC(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - stp x29, x30, [sp, #-80]! - mov x29, sp + frame_push 6, 64 - ldr q30, .Lxts_mul_x - ld1 {v25.16b}, [x5] + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + +0: ldr q30, .Lxts_mul_x + ld1 {v25.16b}, [x24] 99: adr x7, \do8 bl __xts_crypt8 - ldp q16, q17, [sp, #16] - ldp q18, q19, [sp, #48] + ldp q16, q17, [sp, #.Lframe_local_offset] + ldp q18, q19, [sp, #.Lframe_local_offset + 32] eor \o0\().16b, \o0\().16b, v25.16b eor \o1\().16b, \o1\().16b, v26.16b eor \o2\().16b, \o2\().16b, v27.16b eor \o3\().16b, \o3\().16b, v28.16b - st1 {\o0\().16b}, [x0], #16 + st1 {\o0\().16b}, [x19], #16 mov v25.16b, v26.16b tbnz x6, #1, 1f - st1 {\o1\().16b}, [x0], #16 + st1 {\o1\().16b}, [x19], #16 mov v25.16b, v27.16b tbnz x6, #2, 1f - st1 {\o2\().16b}, [x0], #16 + st1 {\o2\().16b}, [x19], #16 mov v25.16b, v28.16b tbnz x6, #3, 1f - st1 {\o3\().16b}, [x0], #16 + st1 {\o3\().16b}, [x19], #16 mov v25.16b, v29.16b tbnz x6, #4, 1f @@ -820,18 +841,22 @@ ENDPROC(__xts_crypt8) eor \o6\().16b, \o6\().16b, v18.16b eor \o7\().16b, \o7\().16b, v19.16b - st1 {\o4\().16b}, [x0], #16 + st1 {\o4\().16b}, [x19], #16 tbnz x6, #5, 1f - st1 {\o5\().16b}, [x0], #16 + st1 {\o5\().16b}, [x19], #16 tbnz x6, #6, 1f - st1 {\o6\().16b}, [x0], #16 + st1 {\o6\().16b}, [x19], #16 tbnz x6, #7, 1f - st1 {\o7\().16b}, [x0], #16 + st1 {\o7\().16b}, [x19], #16 + + cbz x23, 1f + st1 {v25.16b}, [x24] - cbnz x4, 99b + cond_yield_neon 0b + b 99b -1: st1 {v25.16b}, [x5] - ldp x29, x30, [sp], #80 +1: st1 {v25.16b}, [x24] + frame_pop ret .endm @@ -856,24 +881,31 @@ ENDPROC(aesbs_xts_decrypt) * int rounds, int blocks, u8 iv[], u8 final[]) */ ENTRY(aesbs_ctr_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - - cmp x6, #0 - cset x10, ne - add x4, x4, x10 // do one extra block if final - - ldp x7, x8, [x5] - ld1 {v0.16b}, [x5] + frame_push 8 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + mov x25, x6 + + cmp x25, #0 + cset x26, ne + add x23, x23, x26 // do one extra block if final + +98: ldp x7, x8, [x24] + ld1 {v0.16b}, [x24] CPU_LE( rev x7, x7 ) CPU_LE( rev x8, x8 ) adds x8, x8, #1 adc x7, x7, xzr 99: mov x9, #1 - lsl x9, x9, x4 - subs w4, w4, #8 - csel x4, x4, xzr, pl + lsl x9, x9, x23 + subs w23, w23, #8 + csel x23, x23, xzr, pl csel x9, x9, xzr, le tbnz x9, #1, 0f @@ -891,82 +923,85 @@ CPU_LE( rev x8, x8 ) tbnz x9, #7, 0f next_ctr v7 -0: mov bskey, x2 - mov rounds, x3 +0: mov bskey, x21 + mov rounds, x22 bl aesbs_encrypt8 - lsr x9, x9, x10 // disregard the extra block + lsr x9, x9, x26 // disregard the extra block tbnz x9, #0, 0f - ld1 {v8.16b}, [x1], #16 + ld1 {v8.16b}, [x20], #16 eor v0.16b, v0.16b, v8.16b - st1 {v0.16b}, [x0], #16 + st1 {v0.16b}, [x19], #16 tbnz x9, #1, 1f - ld1 {v9.16b}, [x1], #16 + ld1 {v9.16b}, [x20], #16 eor v1.16b, v1.16b, v9.16b - st1 {v1.16b}, [x0], #16 + st1 {v1.16b}, [x19], #16 tbnz x9, #2, 2f - ld1 {v10.16b}, [x1], #16 + ld1 {v10.16b}, [x20], #16 eor v4.16b, v4.16b, v10.16b - st1 {v4.16b}, [x0], #16 + st1 {v4.16b}, [x19], #16 tbnz x9, #3, 3f - ld1 {v11.16b}, [x1], #16 + ld1 {v11.16b}, [x20], #16 eor v6.16b, v6.16b, v11.16b - st1 {v6.16b}, [x0], #16 + st1 {v6.16b}, [x19], #16 tbnz x9, #4, 4f - ld1 {v12.16b}, [x1], #16 + ld1 {v12.16b}, [x20], #16 eor v3.16b, v3.16b, v12.16b - st1 {v3.16b}, [x0], #16 + st1 {v3.16b}, [x19], #16 tbnz x9, #5, 5f - ld1 {v13.16b}, [x1], #16 + ld1 {v13.16b}, [x20], #16 eor v7.16b, v7.16b, v13.16b - st1 {v7.16b}, [x0], #16 + st1 {v7.16b}, [x19], #16 tbnz x9, #6, 6f - ld1 {v14.16b}, [x1], #16 + ld1 {v14.16b}, [x20], #16 eor v2.16b, v2.16b, v14.16b - st1 {v2.16b}, [x0], #16 + st1 {v2.16b}, [x19], #16 tbnz x9, #7, 7f - ld1 {v15.16b}, [x1], #16 + ld1 {v15.16b}, [x20], #16 eor v5.16b, v5.16b, v15.16b - st1 {v5.16b}, [x0], #16 + st1 {v5.16b}, [x19], #16 8: next_ctr v0 - cbnz x4, 99b + st1 {v0.16b}, [x24] + cbz x23, 0f + + cond_yield_neon 98b + b 99b -0: st1 {v0.16b}, [x5] - ldp x29, x30, [sp], #16 +0: frame_pop ret /* * If we are handling the tail of the input (x6 != NULL), return the * final keystream block back to the caller. */ -1: cbz x6, 8b - st1 {v1.16b}, [x6] +1: cbz x25, 8b + st1 {v1.16b}, [x25] b 8b -2: cbz x6, 8b - st1 {v4.16b}, [x6] +2: cbz x25, 8b + st1 {v4.16b}, [x25] b 8b -3: cbz x6, 8b - st1 {v6.16b}, [x6] +3: cbz x25, 8b + st1 {v6.16b}, [x25] b 8b -4: cbz x6, 8b - st1 {v3.16b}, [x6] +4: cbz x25, 8b + st1 {v3.16b}, [x25] b 8b -5: cbz x6, 8b - st1 {v7.16b}, [x6] +5: cbz x25, 8b + st1 {v7.16b}, [x25] b 8b -6: cbz x6, 8b - st1 {v2.16b}, [x6] +6: cbz x25, 8b + st1 {v2.16b}, [x25] b 8b -7: cbz x6, 8b - st1 {v5.16b}, [x6] +7: cbz x25, 8b + st1 {v5.16b}, [x25] b 8b ENDPROC(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S index 16ed3c7ebd37..8061bf0f9c66 100644 --- a/arch/arm64/crypto/crc32-ce-core.S +++ b/arch/arm64/crypto/crc32-ce-core.S @@ -100,9 +100,10 @@ dCONSTANT .req d0 qCONSTANT .req q0 - BUF .req x0 - LEN .req x1 - CRC .req x2 + BUF .req x19 + LEN .req x20 + CRC .req x21 + CONST .req x22 vzr .req v9 @@ -123,7 +124,14 @@ ENTRY(crc32_pmull_le) ENTRY(crc32c_pmull_le) adr_l x3, .Lcrc32c_constants -0: bic LEN, LEN, #15 +0: frame_push 4, 64 + + mov BUF, x0 + mov LEN, x1 + mov CRC, x2 + mov CONST, x3 + + bic LEN, LEN, #15 ld1 {v1.16b-v4.16b}, [BUF], #0x40 movi vzr.16b, #0 fmov dCONSTANT, CRC @@ -132,7 +140,7 @@ ENTRY(crc32c_pmull_le) cmp LEN, #0x40 b.lt less_64 - ldr qCONSTANT, [x3] + ldr qCONSTANT, [CONST] loop_64: /* 64 bytes Full cache line folding */ sub LEN, LEN, #0x40 @@ -162,10 +170,21 @@ loop_64: /* 64 bytes Full cache line folding */ eor v4.16b, v4.16b, v8.16b cmp LEN, #0x40 - b.ge loop_64 + b.lt less_64 + + if_will_cond_yield_neon + stp q1, q2, [sp, #.Lframe_local_offset] + stp q3, q4, [sp, #.Lframe_local_offset + 32] + do_cond_yield_neon + ldp q1, q2, [sp, #.Lframe_local_offset] + ldp q3, q4, [sp, #.Lframe_local_offset + 32] + ldr qCONSTANT, [CONST] + movi vzr.16b, #0 + endif_yield_neon + b loop_64 less_64: /* Folding cache line into 128bit */ - ldr qCONSTANT, [x3, #16] + ldr qCONSTANT, [CONST, #16] pmull2 v5.1q, v1.2d, vCONSTANT.2d pmull v1.1q, v1.1d, vCONSTANT.1d @@ -204,8 +223,8 @@ fold_64: eor v1.16b, v1.16b, v2.16b /* final 32-bit fold */ - ldr dCONSTANT, [x3, #32] - ldr d3, [x3, #40] + ldr dCONSTANT, [CONST, #32] + ldr d3, [CONST, #40] ext v2.16b, v1.16b, vzr.16b, #4 and v1.16b, v1.16b, v3.16b @@ -213,7 +232,7 @@ fold_64: eor v1.16b, v1.16b, v2.16b /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ - ldr qCONSTANT, [x3, #48] + ldr qCONSTANT, [CONST, #48] and v2.16b, v1.16b, v3.16b ext v2.16b, vzr.16b, v2.16b, #8 @@ -223,6 +242,7 @@ fold_64: eor v1.16b, v1.16b, v2.16b mov w0, v1.s[1] + frame_pop ret ENDPROC(crc32_pmull_le) ENDPROC(crc32c_pmull_le) diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S index f179c01bd55c..663ea71cdb38 100644 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -74,13 +74,19 @@ .text .cpu generic+crypto - arg1_low32 .req w0 - arg2 .req x1 - arg3 .req x2 + arg1_low32 .req w19 + arg2 .req x20 + arg3 .req x21 vzr .req v13 ENTRY(crc_t10dif_pmull) + frame_push 3, 128 + + mov arg1_low32, w0 + mov arg2, x1 + mov arg3, x2 + movi vzr.16b, #0 // init zero register // adjust the 16-bit initial_crc value, scale it to 32 bits @@ -175,8 +181,25 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 ) subs arg3, arg3, #128 // check if there is another 64B in the buffer to be able to fold - b.ge _fold_64_B_loop + b.lt _fold_64_B_end + + if_will_cond_yield_neon + stp q0, q1, [sp, #.Lframe_local_offset] + stp q2, q3, [sp, #.Lframe_local_offset + 32] + stp q4, q5, [sp, #.Lframe_local_offset + 64] + stp q6, q7, [sp, #.Lframe_local_offset + 96] + do_cond_yield_neon + ldp q0, q1, [sp, #.Lframe_local_offset] + ldp q2, q3, [sp, #.Lframe_local_offset + 32] + ldp q4, q5, [sp, #.Lframe_local_offset + 64] + ldp q6, q7, [sp, #.Lframe_local_offset + 96] + ldr_l q10, rk3, x8 + movi vzr.16b, #0 // init zero register + endif_yield_neon + + b _fold_64_B_loop +_fold_64_B_end: // at this point, the buffer pointer is pointing at the last y Bytes // of the buffer the 64B of folded data is in 4 of the vector // registers: v0, v1, v2, v3 @@ -304,6 +327,7 @@ _barrett: _cleanup: // scale the result back to 16 bits lsr x0, x0, #16 + frame_pop ret _less_than_128: diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index 11ebf1ae248a..dcffb9e77589 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -213,22 +213,31 @@ .endm .macro __pmull_ghash, pn - ld1 {SHASH.2d}, [x3] - ld1 {XL.2d}, [x1] + frame_push 5 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + +0: ld1 {SHASH.2d}, [x22] + ld1 {XL.2d}, [x20] ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 eor SHASH2.16b, SHASH2.16b, SHASH.16b __pmull_pre_\pn /* do the head block first, if supplied */ - cbz x4, 0f - ld1 {T1.2d}, [x4] - b 1f + cbz x23, 1f + ld1 {T1.2d}, [x23] + mov x23, xzr + b 2f -0: ld1 {T1.2d}, [x2], #16 - sub w0, w0, #1 +1: ld1 {T1.2d}, [x21], #16 + sub w19, w19, #1 -1: /* multiply XL by SHASH in GF(2^128) */ +2: /* multiply XL by SHASH in GF(2^128) */ CPU_LE( rev64 T1.16b, T1.16b ) ext T2.16b, XL.16b, XL.16b, #8 @@ -250,9 +259,18 @@ CPU_LE( rev64 T1.16b, T1.16b ) eor T2.16b, T2.16b, XH.16b eor XL.16b, XL.16b, T2.16b - cbnz w0, 0b + cbz w19, 3f + + if_will_cond_yield_neon + st1 {XL.2d}, [x20] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b - st1 {XL.2d}, [x1] +3: st1 {XL.2d}, [x20] + frame_pop ret .endm @@ -304,38 +322,55 @@ ENDPROC(pmull_ghash_update_p8) .endm .macro pmull_gcm_do_crypt, enc - ld1 {SHASH.2d}, [x4] - ld1 {XL.2d}, [x1] - ldr x8, [x5, #8] // load lower counter + frame_push 10 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + mov x25, x6 + mov x26, x7 + .if \enc == 1 + ldr x27, [sp, #96] // first stacked arg + .endif + + ldr x28, [x24, #8] // load lower counter +CPU_LE( rev x28, x28 ) + +0: mov x0, x25 + load_round_keys w26, x0 + ld1 {SHASH.2d}, [x23] + ld1 {XL.2d}, [x20] movi MASK.16b, #0xe1 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 -CPU_LE( rev x8, x8 ) shl MASK.2d, MASK.2d, #57 eor SHASH2.16b, SHASH2.16b, SHASH.16b .if \enc == 1 - ld1 {KS.16b}, [x7] + ld1 {KS.16b}, [x27] .endif -0: ld1 {CTR.8b}, [x5] // load upper counter - ld1 {INP.16b}, [x3], #16 - rev x9, x8 - add x8, x8, #1 - sub w0, w0, #1 +1: ld1 {CTR.8b}, [x24] // load upper counter + ld1 {INP.16b}, [x22], #16 + rev x9, x28 + add x28, x28, #1 + sub w19, w19, #1 ins CTR.d[1], x9 // set lower counter .if \enc == 1 eor INP.16b, INP.16b, KS.16b // encrypt input - st1 {INP.16b}, [x2], #16 + st1 {INP.16b}, [x21], #16 .endif rev64 T1.16b, INP.16b - cmp w6, #12 - b.ge 2f // AES-192/256? + cmp w26, #12 + b.ge 4f // AES-192/256? -1: enc_round CTR, v21 +2: enc_round CTR, v21 ext T2.16b, XL.16b, XL.16b, #8 ext IN1.16b, T1.16b, T1.16b, #8 @@ -390,27 +425,39 @@ CPU_LE( rev x8, x8 ) .if \enc == 0 eor INP.16b, INP.16b, KS.16b - st1 {INP.16b}, [x2], #16 + st1 {INP.16b}, [x21], #16 .endif - cbnz w0, 0b + cbz w19, 3f -CPU_LE( rev x8, x8 ) - st1 {XL.2d}, [x1] - str x8, [x5, #8] // store lower counter + if_will_cond_yield_neon + st1 {XL.2d}, [x20] + .if \enc == 1 + st1 {KS.16b}, [x27] + .endif + do_cond_yield_neon + b 0b + endif_yield_neon + b 1b + +3: st1 {XL.2d}, [x20] .if \enc == 1 - st1 {KS.16b}, [x7] + st1 {KS.16b}, [x27] .endif +CPU_LE( rev x28, x28 ) + str x28, [x24, #8] // store lower counter + + frame_pop ret -2: b.eq 3f // AES-192? +4: b.eq 5f // AES-192? enc_round CTR, v17 enc_round CTR, v18 -3: enc_round CTR, v19 +5: enc_round CTR, v19 enc_round CTR, v20 - b 1b + b 2b .endm /* diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index cfc9c92814fd..7cf0b1aa6ea8 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -63,11 +63,12 @@ static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src, asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], struct ghash_key const *k, - u8 ctr[], int rounds, u8 ks[]); + u8 ctr[], u32 const rk[], int rounds, + u8 ks[]); asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], struct ghash_key const *k, - u8 ctr[], int rounds); + u8 ctr[], u32 const rk[], int rounds); asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[], u32 const rk[], int rounds); @@ -368,26 +369,29 @@ static int gcm_encrypt(struct aead_request *req) pmull_gcm_encrypt_block(ks, iv, NULL, num_rounds(&ctx->aes_key)); put_unaligned_be32(3, iv + GCM_IV_SIZE); + kernel_neon_end(); - err = skcipher_walk_aead_encrypt(&walk, req, true); + err = skcipher_walk_aead_encrypt(&walk, req, false); while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; + kernel_neon_begin(); pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr, walk.src.virt.addr, &ctx->ghash_key, - iv, num_rounds(&ctx->aes_key), ks); + iv, ctx->aes_key.key_enc, + num_rounds(&ctx->aes_key), ks); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); } else { __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, num_rounds(&ctx->aes_key)); put_unaligned_be32(2, iv + GCM_IV_SIZE); - err = skcipher_walk_aead_encrypt(&walk, req, true); + err = skcipher_walk_aead_encrypt(&walk, req, false); while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -467,15 +471,19 @@ static int gcm_decrypt(struct aead_request *req) pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, num_rounds(&ctx->aes_key)); put_unaligned_be32(2, iv + GCM_IV_SIZE); + kernel_neon_end(); - err = skcipher_walk_aead_decrypt(&walk, req, true); + err = skcipher_walk_aead_decrypt(&walk, req, false); while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; + kernel_neon_begin(); pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr, walk.src.virt.addr, &ctx->ghash_key, - iv, num_rounds(&ctx->aes_key)); + iv, ctx->aes_key.key_enc, + num_rounds(&ctx->aes_key)); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); @@ -483,14 +491,12 @@ static int gcm_decrypt(struct aead_request *req) if (walk.nbytes) pmull_gcm_encrypt_block(iv, iv, NULL, num_rounds(&ctx->aes_key)); - - kernel_neon_end(); } else { __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, num_rounds(&ctx->aes_key)); put_unaligned_be32(2, iv + GCM_IV_SIZE); - err = skcipher_walk_aead_decrypt(&walk, req, true); + err = skcipher_walk_aead_decrypt(&walk, req, false); while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 46049850727d..78eb35fb5056 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -69,30 +69,36 @@ * int blocks) */ ENTRY(sha1_ce_transform) + frame_push 3 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + /* load round constants */ - loadrc k0.4s, 0x5a827999, w6 +0: loadrc k0.4s, 0x5a827999, w6 loadrc k1.4s, 0x6ed9eba1, w6 loadrc k2.4s, 0x8f1bbcdc, w6 loadrc k3.4s, 0xca62c1d6, w6 /* load state */ - ld1 {dgav.4s}, [x0] - ldr dgb, [x0, #16] + ld1 {dgav.4s}, [x19] + ldr dgb, [x19, #16] /* load sha1_ce_state::finalize */ ldr_l w4, sha1_ce_offsetof_finalize, x4 - ldr w4, [x0, x4] + ldr w4, [x19, x4] /* load input */ -0: ld1 {v8.4s-v11.4s}, [x1], #64 - sub w2, w2, #1 +1: ld1 {v8.4s-v11.4s}, [x20], #64 + sub w21, w21, #1 CPU_LE( rev32 v8.16b, v8.16b ) CPU_LE( rev32 v9.16b, v9.16b ) CPU_LE( rev32 v10.16b, v10.16b ) CPU_LE( rev32 v11.16b, v11.16b ) -1: add t0.4s, v8.4s, k0.4s +2: add t0.4s, v8.4s, k0.4s mov dg0v.16b, dgav.16b add_update c, ev, k0, 8, 9, 10, 11, dgb @@ -123,16 +129,25 @@ CPU_LE( rev32 v11.16b, v11.16b ) add dgbv.2s, dgbv.2s, dg1v.2s add dgav.4s, dgav.4s, dg0v.4s - cbnz w2, 0b + cbz w21, 3f + + if_will_cond_yield_neon + st1 {dgav.4s}, [x19] + str dgb, [x19, #16] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b /* * Final block: add padding and total bit count. * Skip if the input size was not a round multiple of the block size, * the padding is handled by the C code in that case. */ - cbz x4, 3f +3: cbz x4, 4f ldr_l w4, sha1_ce_offsetof_count, x4 - ldr x4, [x0, x4] + ldr x4, [x19, x4] movi v9.2d, #0 mov x8, #0x80000000 movi v10.2d, #0 @@ -141,10 +156,11 @@ CPU_LE( rev32 v11.16b, v11.16b ) mov x4, #0 mov v11.d[0], xzr mov v11.d[1], x7 - b 1b + b 2b /* store new state */ -3: st1 {dgav.4s}, [x0] - str dgb, [x0, #16] +4: st1 {dgav.4s}, [x19] + str dgb, [x19, #16] + frame_pop ret ENDPROC(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 4c3c89b812ce..cd8b36412469 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -79,30 +79,36 @@ */ .text ENTRY(sha2_ce_transform) + frame_push 3 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + /* load round constants */ - adr_l x8, .Lsha2_rcon +0: adr_l x8, .Lsha2_rcon ld1 { v0.4s- v3.4s}, [x8], #64 ld1 { v4.4s- v7.4s}, [x8], #64 ld1 { v8.4s-v11.4s}, [x8], #64 ld1 {v12.4s-v15.4s}, [x8] /* load state */ - ld1 {dgav.4s, dgbv.4s}, [x0] + ld1 {dgav.4s, dgbv.4s}, [x19] /* load sha256_ce_state::finalize */ ldr_l w4, sha256_ce_offsetof_finalize, x4 - ldr w4, [x0, x4] + ldr w4, [x19, x4] /* load input */ -0: ld1 {v16.4s-v19.4s}, [x1], #64 - sub w2, w2, #1 +1: ld1 {v16.4s-v19.4s}, [x20], #64 + sub w21, w21, #1 CPU_LE( rev32 v16.16b, v16.16b ) CPU_LE( rev32 v17.16b, v17.16b ) CPU_LE( rev32 v18.16b, v18.16b ) CPU_LE( rev32 v19.16b, v19.16b ) -1: add t0.4s, v16.4s, v0.4s +2: add t0.4s, v16.4s, v0.4s mov dg0v.16b, dgav.16b mov dg1v.16b, dgbv.16b @@ -131,16 +137,24 @@ CPU_LE( rev32 v19.16b, v19.16b ) add dgbv.4s, dgbv.4s, dg1v.4s /* handled all input blocks? */ - cbnz w2, 0b + cbz w21, 3f + + if_will_cond_yield_neon + st1 {dgav.4s, dgbv.4s}, [x19] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b /* * Final block: add padding and total bit count. * Skip if the input size was not a round multiple of the block size, * the padding is handled by the C code in that case. */ - cbz x4, 3f +3: cbz x4, 4f ldr_l w4, sha256_ce_offsetof_count, x4 - ldr x4, [x0, x4] + ldr x4, [x19, x4] movi v17.2d, #0 mov x8, #0x80000000 movi v18.2d, #0 @@ -149,9 +163,10 @@ CPU_LE( rev32 v19.16b, v19.16b ) mov x4, #0 mov v19.d[0], xzr mov v19.d[1], x7 - b 1b + b 2b /* store new state */ -3: st1 {dgav.4s, dgbv.4s}, [x0] +4: st1 {dgav.4s, dgbv.4s}, [x19] + frame_pop ret ENDPROC(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped index 3ce82cc860bc..7c7ce2e3bad6 100644 --- a/arch/arm64/crypto/sha256-core.S_shipped +++ b/arch/arm64/crypto/sha256-core.S_shipped @@ -1,3 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +// This code is taken from the OpenSSL project but the author (Andy Polyakov) +// has relicensed it under the GPLv2. Therefore this program is free software; +// you can redistribute it and/or modify it under the terms of the GNU General +// Public License version 2 as published by the Free Software Foundation. +// +// The original headers, including the original license headers, are +// included below for completeness. + // Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the OpenSSL license (the "License"). You may not use @@ -10,8 +20,6 @@ // project. The module is, however, dual licensed under OpenSSL and // CRYPTOGAMS licenses depending on where you obtain it. For further // details see http://www.openssl.org/~appro/cryptogams/. -// -// Permission to use under GPLv2 terms is granted. // ==================================================================== // // SHA256/512 for ARMv8. diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S index 332ad7530690..a7d587fa54f6 100644 --- a/arch/arm64/crypto/sha3-ce-core.S +++ b/arch/arm64/crypto/sha3-ce-core.S @@ -41,9 +41,16 @@ */ .text ENTRY(sha3_ce_transform) - /* load state */ - add x8, x0, #32 - ld1 { v0.1d- v3.1d}, [x0] + frame_push 4 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + +0: /* load state */ + add x8, x19, #32 + ld1 { v0.1d- v3.1d}, [x19] ld1 { v4.1d- v7.1d}, [x8], #32 ld1 { v8.1d-v11.1d}, [x8], #32 ld1 {v12.1d-v15.1d}, [x8], #32 @@ -51,13 +58,13 @@ ENTRY(sha3_ce_transform) ld1 {v20.1d-v23.1d}, [x8], #32 ld1 {v24.1d}, [x8] -0: sub w2, w2, #1 +1: sub w21, w21, #1 mov w8, #24 adr_l x9, .Lsha3_rcon /* load input */ - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b-v31.8b}, [x1], #24 + ld1 {v25.8b-v28.8b}, [x20], #32 + ld1 {v29.8b-v31.8b}, [x20], #24 eor v0.8b, v0.8b, v25.8b eor v1.8b, v1.8b, v26.8b eor v2.8b, v2.8b, v27.8b @@ -66,10 +73,10 @@ ENTRY(sha3_ce_transform) eor v5.8b, v5.8b, v30.8b eor v6.8b, v6.8b, v31.8b - tbnz x3, #6, 2f // SHA3-512 + tbnz x22, #6, 3f // SHA3-512 - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b-v30.8b}, [x1], #16 + ld1 {v25.8b-v28.8b}, [x20], #32 + ld1 {v29.8b-v30.8b}, [x20], #16 eor v7.8b, v7.8b, v25.8b eor v8.8b, v8.8b, v26.8b eor v9.8b, v9.8b, v27.8b @@ -77,34 +84,34 @@ ENTRY(sha3_ce_transform) eor v11.8b, v11.8b, v29.8b eor v12.8b, v12.8b, v30.8b - tbnz x3, #4, 1f // SHA3-384 or SHA3-224 + tbnz x22, #4, 2f // SHA3-384 or SHA3-224 // SHA3-256 - ld1 {v25.8b-v28.8b}, [x1], #32 + ld1 {v25.8b-v28.8b}, [x20], #32 eor v13.8b, v13.8b, v25.8b eor v14.8b, v14.8b, v26.8b eor v15.8b, v15.8b, v27.8b eor v16.8b, v16.8b, v28.8b - b 3f + b 4f -1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 +2: tbz x22, #2, 4f // bit 2 cleared? SHA-384 // SHA3-224 - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b}, [x1], #8 + ld1 {v25.8b-v28.8b}, [x20], #32 + ld1 {v29.8b}, [x20], #8 eor v13.8b, v13.8b, v25.8b eor v14.8b, v14.8b, v26.8b eor v15.8b, v15.8b, v27.8b eor v16.8b, v16.8b, v28.8b eor v17.8b, v17.8b, v29.8b - b 3f + b 4f // SHA3-512 -2: ld1 {v25.8b-v26.8b}, [x1], #16 +3: ld1 {v25.8b-v26.8b}, [x20], #16 eor v7.8b, v7.8b, v25.8b eor v8.8b, v8.8b, v26.8b -3: sub w8, w8, #1 +4: sub w8, w8, #1 eor3 v29.16b, v4.16b, v9.16b, v14.16b eor3 v26.16b, v1.16b, v6.16b, v11.16b @@ -183,17 +190,33 @@ ENTRY(sha3_ce_transform) eor v0.16b, v0.16b, v31.16b - cbnz w8, 3b - cbnz w2, 0b + cbnz w8, 4b + cbz w21, 5f + + if_will_cond_yield_neon + add x8, x19, #32 + st1 { v0.1d- v3.1d}, [x19] + st1 { v4.1d- v7.1d}, [x8], #32 + st1 { v8.1d-v11.1d}, [x8], #32 + st1 {v12.1d-v15.1d}, [x8], #32 + st1 {v16.1d-v19.1d}, [x8], #32 + st1 {v20.1d-v23.1d}, [x8], #32 + st1 {v24.1d}, [x8] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b /* save state */ - st1 { v0.1d- v3.1d}, [x0], #32 - st1 { v4.1d- v7.1d}, [x0], #32 - st1 { v8.1d-v11.1d}, [x0], #32 - st1 {v12.1d-v15.1d}, [x0], #32 - st1 {v16.1d-v19.1d}, [x0], #32 - st1 {v20.1d-v23.1d}, [x0], #32 - st1 {v24.1d}, [x0] +5: st1 { v0.1d- v3.1d}, [x19], #32 + st1 { v4.1d- v7.1d}, [x19], #32 + st1 { v8.1d-v11.1d}, [x19], #32 + st1 {v12.1d-v15.1d}, [x19], #32 + st1 {v16.1d-v19.1d}, [x19], #32 + st1 {v20.1d-v23.1d}, [x19], #32 + st1 {v24.1d}, [x19] + frame_pop ret ENDPROC(sha3_ce_transform) diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl index c55efb308544..2d8655d5b1af 100644 --- a/arch/arm64/crypto/sha512-armv8.pl +++ b/arch/arm64/crypto/sha512-armv8.pl @@ -1,4 +1,14 @@ #! /usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 + +# This code is taken from the OpenSSL project but the author (Andy Polyakov) +# has relicensed it under the GPLv2. Therefore this program is free software; +# you can redistribute it and/or modify it under the terms of the GNU General +# Public License version 2 as published by the Free Software Foundation. +# +# The original headers, including the original license headers, are +# included below for completeness. + # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use @@ -11,8 +21,6 @@ # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. -# -# Permission to use under GPLv2 terms is granted. # ==================================================================== # # SHA256/512 for ARMv8. diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S index 7f3bca5c59a2..ce65e3abe4f2 100644 --- a/arch/arm64/crypto/sha512-ce-core.S +++ b/arch/arm64/crypto/sha512-ce-core.S @@ -107,17 +107,23 @@ */ .text ENTRY(sha512_ce_transform) + frame_push 3 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + /* load state */ - ld1 {v8.2d-v11.2d}, [x0] +0: ld1 {v8.2d-v11.2d}, [x19] /* load first 4 round constants */ adr_l x3, .Lsha512_rcon ld1 {v20.2d-v23.2d}, [x3], #64 /* load input */ -0: ld1 {v12.2d-v15.2d}, [x1], #64 - ld1 {v16.2d-v19.2d}, [x1], #64 - sub w2, w2, #1 +1: ld1 {v12.2d-v15.2d}, [x20], #64 + ld1 {v16.2d-v19.2d}, [x20], #64 + sub w21, w21, #1 CPU_LE( rev64 v12.16b, v12.16b ) CPU_LE( rev64 v13.16b, v13.16b ) @@ -196,9 +202,18 @@ CPU_LE( rev64 v19.16b, v19.16b ) add v11.2d, v11.2d, v3.2d /* handled all input blocks? */ - cbnz w2, 0b + cbz w21, 3f + + if_will_cond_yield_neon + st1 {v8.2d-v11.2d}, [x19] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b /* store new state */ -3: st1 {v8.2d-v11.2d}, [x0] +3: st1 {v8.2d-v11.2d}, [x19] + frame_pop ret ENDPROC(sha512_ce_transform) diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped index bd0f59f06c9d..e063a6106720 100644 --- a/arch/arm64/crypto/sha512-core.S_shipped +++ b/arch/arm64/crypto/sha512-core.S_shipped @@ -1,3 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +// This code is taken from the OpenSSL project but the author (Andy Polyakov) +// has relicensed it under the GPLv2. Therefore this program is free software; +// you can redistribute it and/or modify it under the terms of the GNU General +// Public License version 2 as published by the Free Software Foundation. +// +// The original headers, including the original license headers, are +// included below for completeness. + // Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the OpenSSL license (the "License"). You may not use @@ -10,8 +20,6 @@ // project. The module is, however, dual licensed under OpenSSL and // CRYPTOGAMS licenses depending on where you obtain it. For further // details see http://www.openssl.org/~appro/cryptogams/. -// -// Permission to use under GPLv2 terms is granted. // ==================================================================== // // SHA256/512 for ARMv8. diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S new file mode 100644 index 000000000000..af3bfbc3f4d4 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8 + .set .Lv\b\().4s, \b + .endr + + .macro sm4e, rd, rn + .inst 0xcec08400 | .L\rd | (.L\rn << 5) + .endm + + /* + * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in); + */ + .text +ENTRY(sm4_ce_do_crypt) + ld1 {v8.4s}, [x2] + ld1 {v0.4s-v3.4s}, [x0], #64 +CPU_LE( rev32 v8.16b, v8.16b ) + ld1 {v4.4s-v7.4s}, [x0] + sm4e v8.4s, v0.4s + sm4e v8.4s, v1.4s + sm4e v8.4s, v2.4s + sm4e v8.4s, v3.4s + sm4e v8.4s, v4.4s + sm4e v8.4s, v5.4s + sm4e v8.4s, v6.4s + sm4e v8.4s, v7.4s + rev64 v8.4s, v8.4s + ext v8.16b, v8.16b, v8.16b, #8 +CPU_LE( rev32 v8.16b, v8.16b ) + st1 {v8.4s}, [x1] + ret +ENDPROC(sm4_ce_do_crypt) diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c new file mode 100644 index 000000000000..b7fb5274b250 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <asm/neon.h> +#include <asm/simd.h> +#include <crypto/sm4.h> +#include <linux/module.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/types.h> + +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-ce"); +MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in); + +static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!may_use_simd()) { + crypto_sm4_encrypt(tfm, out, in); + } else { + kernel_neon_begin(); + sm4_ce_do_crypt(ctx->rkey_enc, out, in); + kernel_neon_end(); + } +} + +static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!may_use_simd()) { + crypto_sm4_decrypt(tfm, out, in); + } else { + kernel_neon_begin(); + sm4_ce_do_crypt(ctx->rkey_dec, out, in); + kernel_neon_end(); + } +} + +static struct crypto_alg sm4_ce_alg = { + .cra_name = "sm4", + .cra_driver_name = "sm4-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sm4_ctx), + .cra_module = THIS_MODULE, + .cra_u.cipher = { + .cia_min_keysize = SM4_KEY_SIZE, + .cia_max_keysize = SM4_KEY_SIZE, + .cia_setkey = crypto_sm4_set_key, + .cia_encrypt = sm4_ce_encrypt, + .cia_decrypt = sm4_ce_decrypt + } +}; + +static int __init sm4_ce_mod_init(void) +{ + return crypto_register_alg(&sm4_ce_alg); +} + +static void __exit sm4_ce_mod_fini(void) +{ + crypto_unregister_alg(&sm4_ce_alg); +} + +module_cpu_feature_match(SM3, sm4_ce_mod_init); +module_exit(sm4_ce_mod_fini); diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 669028172fd6..a91933b1e2e6 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -5,6 +5,8 @@ #include <asm/cpucaps.h> #include <asm/insn.h> +#define ARM64_CB_PATCH ARM64_NCAPS + #ifndef __ASSEMBLY__ #include <linux/init.h> @@ -22,12 +24,19 @@ struct alt_instr { u8 alt_len; /* size of new instruction(s), <= orig_len */ }; +typedef void (*alternative_cb_t)(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); + void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -#define ALTINSTR_ENTRY(feature) \ +#define ALTINSTR_ENTRY(feature,cb) \ " .word 661b - .\n" /* label */ \ + " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ + " .else\n" \ + " .word " __stringify(cb) "- .\n" /* callback */ \ + " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -45,15 +54,18 @@ void apply_alternatives(void *start, size_t length); * but most assemblers die if insn1 or insn2 have a .inst. This should * be fixed in a binutils release posterior to 2.25.51.0.2 (anything * containing commit 4e4d08cf7399b606 or c1baaddf8861). + * + * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature) \ + ALTINSTR_ENTRY(feature,cb) \ ".popsection\n" \ + " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -61,11 +73,17 @@ void apply_alternatives(void *start, size_t length); ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ + ".else\n\t" \ + "663:\n\t" \ + "664:\n\t" \ + ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) +#define ALTERNATIVE_CB(oldinstr, cb) \ + __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) #else #include <asm/assembler.h> @@ -132,6 +150,14 @@ void apply_alternatives(void *start, size_t length); 661: .endm +.macro alternative_cb cb + .set .Lasm_alt_mode, 0 + .pushsection .altinstructions, "a" + altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0 + .popsection +661: +.endm + /* * Provide the other half of the alternative code sequence. */ @@ -158,6 +184,13 @@ void apply_alternatives(void *start, size_t length); .endm /* + * Callback-based alternative epilogue + */ +.macro alternative_cb_end +662: +.endm + +/* * Provides a trivial alternative or default sequence consisting solely * of NOPs. The number of NOPs is chosen automatically to match the * previous case. diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 053d83e8db6f..0bcc98dbba56 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -565,4 +565,140 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU #endif .endm + /* + * frame_push - Push @regcount callee saved registers to the stack, + * starting at x19, as well as x29/x30, and set x29 to + * the new value of sp. Add @extra bytes of stack space + * for locals. + */ + .macro frame_push, regcount:req, extra + __frame st, \regcount, \extra + .endm + + /* + * frame_pop - Pop the callee saved registers from the stack that were + * pushed in the most recent call to frame_push, as well + * as x29/x30 and any extra stack space that may have been + * allocated. + */ + .macro frame_pop + __frame ld + .endm + + .macro __frame_regs, reg1, reg2, op, num + .if .Lframe_regcount == \num + \op\()r \reg1, [sp, #(\num + 1) * 8] + .elseif .Lframe_regcount > \num + \op\()p \reg1, \reg2, [sp, #(\num + 1) * 8] + .endif + .endm + + .macro __frame, op, regcount, extra=0 + .ifc \op, st + .if (\regcount) < 0 || (\regcount) > 10 + .error "regcount should be in the range [0 ... 10]" + .endif + .if ((\extra) % 16) != 0 + .error "extra should be a multiple of 16 bytes" + .endif + .ifdef .Lframe_regcount + .if .Lframe_regcount != -1 + .error "frame_push/frame_pop may not be nested" + .endif + .endif + .set .Lframe_regcount, \regcount + .set .Lframe_extra, \extra + .set .Lframe_local_offset, ((\regcount + 3) / 2) * 16 + stp x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]! + mov x29, sp + .endif + + __frame_regs x19, x20, \op, 1 + __frame_regs x21, x22, \op, 3 + __frame_regs x23, x24, \op, 5 + __frame_regs x25, x26, \op, 7 + __frame_regs x27, x28, \op, 9 + + .ifc \op, ld + .if .Lframe_regcount == -1 + .error "frame_push/frame_pop may not be nested" + .endif + ldp x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra + .set .Lframe_regcount, -1 + .endif + .endm + +/* + * Check whether to yield to another runnable task from kernel mode NEON code + * (which runs with preemption disabled). + * + * if_will_cond_yield_neon + * // pre-yield patchup code + * do_cond_yield_neon + * // post-yield patchup code + * endif_yield_neon <label> + * + * where <label> is optional, and marks the point where execution will resume + * after a yield has been performed. If omitted, execution resumes right after + * the endif_yield_neon invocation. Note that the entire sequence, including + * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT + * is not defined. + * + * As a convenience, in the case where no patchup code is required, the above + * sequence may be abbreviated to + * + * cond_yield_neon <label> + * + * Note that the patchup code does not support assembler directives that change + * the output section, any use of such directives is undefined. + * + * The yield itself consists of the following: + * - Check whether the preempt count is exactly 1, in which case disabling + * preemption once will make the task preemptible. If this is not the case, + * yielding is pointless. + * - Check whether TIF_NEED_RESCHED is set, and if so, disable and re-enable + * kernel mode NEON (which will trigger a reschedule), and branch to the + * yield fixup code. + * + * This macro sequence may clobber all CPU state that is not guaranteed by the + * AAPCS to be preserved across an ordinary function call. + */ + + .macro cond_yield_neon, lbl + if_will_cond_yield_neon + do_cond_yield_neon + endif_yield_neon \lbl + .endm + + .macro if_will_cond_yield_neon +#ifdef CONFIG_PREEMPT + get_thread_info x0 + ldr w1, [x0, #TSK_TI_PREEMPT] + ldr x0, [x0, #TSK_TI_FLAGS] + cmp w1, #PREEMPT_DISABLE_OFFSET + csel x0, x0, xzr, eq + tbnz x0, #TIF_NEED_RESCHED, .Lyield_\@ // needs rescheduling? + /* fall through to endif_yield_neon */ + .subsection 1 +.Lyield_\@ : +#else + .section ".discard.cond_yield_neon", "ax" +#endif + .endm + + .macro do_cond_yield_neon + bl kernel_neon_end + bl kernel_neon_begin + .endm + + .macro endif_yield_neon, lbl + .ifnb \lbl + b \lbl + .else + b .Lyield_out_\@ + .endif + .previous +.Lyield_out_\@ : + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 9ef0797380cb..f9b0b09153e0 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v) /* LSE atomics */ " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \ /* LSE atomics */ \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v) /* LSE atomics */ " neg %w[i], %w[i]\n" " stadd %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], w30, %[v]\n" \ " add %w[i], %w[i], w30") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS , ##cl); \ \ @@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v) /* LSE atomics */ " mvn %[i], %[i]\n" " stclr %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) /* LSE atomics */ " neg %[i], %[i]\n" " stadd %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], x30, %[v]\n" \ " add %[i], %[i], x30") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) " sub x30, x30, %[ret]\n" " cbnz x30, 1b\n" "2:") - : [ret] "+r" (x0), [v] "+Q" (v->counter) + : [ret] "+&r" (x0), [v] "+Q" (v->counter) : : __LL_SC_CLOBBERS, "cc", "memory"); @@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]") \ - : [old1] "+r" (x0), [old2] "+r" (x1), \ + : [old1] "+&r" (x0), [old2] "+&r" (x1), \ [v] "+Q" (*(unsigned long *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 7dfcec4700fe..0094c6653b06 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -140,10 +140,8 @@ static inline void __flush_icache_all(void) dsb(ish); } -#define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) -#define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) /* * We don't appear to need to do anything here. In fact, if we did, we'd diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index c00c62e1a4a3..1a037b94eba1 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -34,7 +34,6 @@ typedef u32 compat_size_t; typedef s32 compat_ssize_t; -typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; typedef u16 __compat_uid_t; @@ -66,16 +65,6 @@ typedef u32 compat_ulong_t; typedef u64 compat_u64; typedef u32 compat_uptr_t; -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; - struct compat_stat { #ifdef __AARCH64EB__ short st_dev; @@ -192,10 +181,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - compat_time_t sem_otime; - compat_ulong_t __unused1; - compat_time_t sem_ctime; - compat_ulong_t __unused2; + compat_ulong_t sem_otime; + compat_ulong_t sem_otime_high; + compat_ulong_t sem_ctime; + compat_ulong_t sem_ctime_high; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; @@ -203,12 +192,12 @@ struct compat_semid64_ds { struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; - compat_time_t msg_stime; - compat_ulong_t __unused1; - compat_time_t msg_rtime; - compat_ulong_t __unused2; - compat_time_t msg_ctime; - compat_ulong_t __unused3; + compat_ulong_t msg_stime; + compat_ulong_t msg_stime_high; + compat_ulong_t msg_rtime; + compat_ulong_t msg_rtime_high; + compat_ulong_t msg_ctime; + compat_ulong_t msg_ctime_high; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; @@ -221,12 +210,12 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; compat_size_t shm_segsz; - compat_time_t shm_atime; - compat_ulong_t __unused1; - compat_time_t shm_dtime; - compat_ulong_t __unused2; - compat_time_t shm_ctime; - compat_ulong_t __unused3; + compat_ulong_t shm_atime; + compat_ulong_t shm_atime_high; + compat_ulong_t shm_dtime; + compat_ulong_t shm_dtime_high; + compat_ulong_t shm_ctime; + compat_ulong_t shm_ctime_high; compat_pid_t shm_cpid; compat_pid_t shm_lpid; compat_ulong_t shm_nattch; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 21bb624e0a7a..bc51b72fafd4 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -32,7 +32,7 @@ #define ARM64_HAS_VIRT_HOST_EXTN 11 #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 -#define ARM64_HYP_OFFSET_LOW 14 +#define ARM64_HARDEN_EL2_VECTORS 14 #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 @@ -43,13 +43,12 @@ #define ARM64_SVE 22 #define ARM64_UNMAP_KERNEL_AT_EL0 23 #define ARM64_HARDEN_BRANCH_PREDICTOR 24 -#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25 -#define ARM64_HAS_RAS_EXTN 26 -#define ARM64_WORKAROUND_843419 27 -#define ARM64_HAS_CACHE_IDC 28 -#define ARM64_HAS_CACHE_DIC 29 -#define ARM64_HW_DBM 30 +#define ARM64_HAS_RAS_EXTN 25 +#define ARM64_WORKAROUND_843419 26 +#define ARM64_HAS_CACHE_IDC 27 +#define ARM64_HAS_CACHE_DIC 28 +#define ARM64_HW_DBM 29 -#define ARM64_NCAPS 31 +#define ARM64_NCAPS 30 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 30014a9f8f2b..ea690b3562af 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_IMP_CAVIUM 0x43 #define ARM_CPU_IMP_BRCM 0x42 #define ARM_CPU_IMP_QCOM 0x51 +#define ARM_CPU_IMP_NVIDIA 0x4E #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -99,6 +100,9 @@ #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define NVIDIA_CPU_PART_DENVER 0x003 +#define NVIDIA_CPU_PART_CARMEL 0x004 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -114,6 +118,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) +#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 4214c38d016b..f62c56b1793f 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -70,6 +70,7 @@ enum aarch64_insn_imm_type { AARCH64_INSN_IMM_6, AARCH64_INSN_IMM_S, AARCH64_INSN_IMM_R, + AARCH64_INSN_IMM_N, AARCH64_INSN_IMM_MAX }; @@ -314,6 +315,11 @@ __AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000) __AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000) __AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000) __AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000) +__AARCH64_INSN_FUNCS(and_imm, 0x7F800000, 0x12000000) +__AARCH64_INSN_FUNCS(orr_imm, 0x7F800000, 0x32000000) +__AARCH64_INSN_FUNCS(eor_imm, 0x7F800000, 0x52000000) +__AARCH64_INSN_FUNCS(ands_imm, 0x7F800000, 0x72000000) +__AARCH64_INSN_FUNCS(extr, 0x7FA00000, 0x13800000) __AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000) __AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000) __AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000) @@ -423,6 +429,16 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, int shift, enum aarch64_insn_variant variant, enum aarch64_insn_logic_type type); +u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type, + enum aarch64_insn_variant variant, + enum aarch64_insn_register Rn, + enum aarch64_insn_register Rd, + u64 imm); +u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, + enum aarch64_insn_register Rm, + enum aarch64_insn_register Rn, + enum aarch64_insn_register Rd, + u8 lsb); u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index b0c84171e6a3..6dd285e979c9 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -25,6 +25,7 @@ /* Hyp Configuration Register (HCR) bits */ #define HCR_TEA (UL(1) << 37) #define HCR_TERR (UL(1) << 36) +#define HCR_TLOR (UL(1) << 35) #define HCR_E2H (UL(1) << 34) #define HCR_ID (UL(1) << 33) #define HCR_CD (UL(1) << 32) @@ -64,6 +65,7 @@ /* * The bits we set in HCR: + * TLOR: Trap LORegion register accesses * RW: 64bit by default, can be overridden for 32bit VMs * TAC: Trap ACTLR * TSC: Trap SMC @@ -81,9 +83,9 @@ */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ - HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) + HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ + HCR_FMO | HCR_IMO) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) -#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24961b732e65..f6648a3e4152 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -33,6 +33,7 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +/* Translate a kernel address of @sym into its equivalent linear mapping */ #define kvm_ksym_ref(sym) \ ({ \ void *val = &sym; \ @@ -57,7 +58,9 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); -extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); + +extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); @@ -68,7 +71,19 @@ extern u32 __kvm_get_mdcr_el2(void); extern u32 __init_stage2_translation(void); -extern void __qcom_hyp_sanitize_btac_predictors(void); +#else /* __ASSEMBLY__ */ + +.macro get_host_ctxt reg, tmp + adr_l \reg, kvm_host_cpu_state + mrs \tmp, tpidr_el2 + add \reg, \reg, \tmp +.endm + +.macro get_vcpu_ptr vcpu, ctxt + get_host_ctxt \ctxt, \vcpu + ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] + kern_hyp_va \vcpu +.endm #endif diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 413dc82b1e89..1dab3a984608 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -26,13 +26,15 @@ #include <asm/esr.h> #include <asm/kvm_arm.h> +#include <asm/kvm_hyp.h> #include <asm/kvm_mmio.h> #include <asm/ptrace.h> #include <asm/cputype.h> #include <asm/virt.h> unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); -unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); +unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); +void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); @@ -45,6 +47,11 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu); void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.hcr_el2 & HCR_RW); +} + static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; @@ -59,16 +66,19 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) vcpu->arch.hcr_el2 &= ~HCR_RW; -} -static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.hcr_el2; + /* + * TID3: trap feature register accesses that we virtualise. + * For now this is conditional, since no AArch32 feature regs + * are currently virtualised. + */ + if (!vcpu_el1_is_32bit(vcpu)) + vcpu->arch.hcr_el2 |= HCR_TID3; } -static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) +static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = hcr; + return (unsigned long *)&vcpu->arch.hcr_el2; } static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) @@ -81,11 +91,27 @@ static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; } -static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu) +static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; } +static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.sysregs_loaded_on_cpu) + return read_sysreg_el1(elr); + else + return *__vcpu_elr_el1(vcpu); +} + +static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) +{ + if (vcpu->arch.sysregs_loaded_on_cpu) + write_sysreg_el1(v, elr); + else + *__vcpu_elr_el1(vcpu) = v; +} + static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; @@ -135,13 +161,28 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; } -/* Get vcpu SPSR for current mode */ -static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) +static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) - return vcpu_spsr32(vcpu); + return vcpu_read_spsr32(vcpu); - return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; + if (vcpu->arch.sysregs_loaded_on_cpu) + return read_sysreg_el1(spsr); + else + return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; +} + +static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) +{ + if (vcpu_mode_is_32bit(vcpu)) { + vcpu_write_spsr32(vcpu, v); + return; + } + + if (vcpu->arch.sysregs_loaded_on_cpu) + write_sysreg_el1(v, spsr); + else + vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) @@ -282,15 +323,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; + return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { - if (vcpu_mode_is_32bit(vcpu)) + if (vcpu_mode_is_32bit(vcpu)) { *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; - else - vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25); + } else { + u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + sctlr |= (1 << 25); + vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1); + } } static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) @@ -298,7 +342,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); - return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); + return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 596f8e414a4c..469de8acd06f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -75,6 +75,9 @@ struct kvm_arch { /* Interrupt controller */ struct vgic_dist vgic; + + /* Mandated version of PSCI */ + u32 psci_version; }; #define KVM_NR_MEM_OBJS 40 @@ -272,9 +275,6 @@ struct kvm_vcpu_arch { /* IO related fields */ struct kvm_decode mmio_decode; - /* Interrupt related fields */ - u64 irq_lines; /* IRQ and FIQ levels */ - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -287,10 +287,25 @@ struct kvm_vcpu_arch { /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; + + /* True when deferrable sysregs are loaded on the physical CPU, + * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ + bool sysregs_loaded_on_cpu; }; #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) -#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) + +/* + * Only use __vcpu_sys_reg if you know you want the memory backed version of a + * register, and not the one most recently accessed by a running VCPU. For + * example, for userspace access or for system registers that are never context + * switched, but only emulated. + */ +#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) + +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg); +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); + /* * CP14 and CP15 live in the same array, as they are backed by the * same system registers. @@ -298,14 +313,6 @@ struct kvm_vcpu_arch { #define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) #define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) -#ifdef CONFIG_CPU_BIG_ENDIAN -#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r)) -#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1) -#else -#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1) -#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r)) -#endif - struct kvm_vm_stat { ulong remote_tlb_flush; }; @@ -358,10 +365,15 @@ int kvm_perf_teardown(void); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); +void __kvm_set_tpidr_el2(u64 tpidr_el2); +DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { + u64 tpidr_el2; + /* * Call initialization code, and switch to the full blown HYP code. * If the cpucaps haven't been finalized yet, something has gone very @@ -370,6 +382,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); + + /* + * Calculate the raw per-cpu offset without a translation from the + * kernel's mapping to the linear mapping, and store it in tpidr_el2 + * so that we can use adr_l to access per-cpu variables in EL2. + */ + tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state) + - (u64)kvm_ksym_ref(kvm_host_cpu_state); + + kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2); } static inline void kvm_arch_hardware_unsetup(void) {} @@ -416,6 +438,13 @@ static inline void kvm_arm_vhe_guest_enter(void) static inline void kvm_arm_vhe_guest_exit(void) { local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); } static inline bool kvm_arm_harden_branch_predictor(void) @@ -423,4 +452,7 @@ static inline bool kvm_arm_harden_branch_predictor(void) return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); } +void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index f26f9cd70c72..384c34397619 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -120,37 +120,38 @@ typeof(orig) * __hyp_text fname(void) \ return val; \ } -void __vgic_v2_save_state(struct kvm_vcpu *vcpu); -void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); +void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); +void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_enable_traps(struct kvm_vcpu *vcpu); void __timer_disable_traps(struct kvm_vcpu *vcpu); -void __sysreg_save_host_state(struct kvm_cpu_context *ctxt); -void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt); -void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt); -void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt); +void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); +void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); +void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); +void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); +void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); +void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); void __sysreg32_save_state(struct kvm_vcpu *vcpu); void __sysreg32_restore_state(struct kvm_vcpu *vcpu); -void __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt); -void __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt); -void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); -void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); +void __debug_switch_to_guest(struct kvm_vcpu *vcpu); +void __debug_switch_to_host(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); bool __fpsimd_enabled(void); +void activate_traps_vhe_load(struct kvm_vcpu *vcpu); +void deactivate_traps_vhe_put(void); + u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); void __noreturn __hyp_do_panic(unsigned long, ...); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7faed6e48b46..6128992c2ded 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -69,9 +69,6 @@ * mappings, and none of this applies in that case. */ -#define HYP_PAGE_OFFSET_HIGH_MASK ((UL(1) << VA_BITS) - 1) -#define HYP_PAGE_OFFSET_LOW_MASK ((UL(1) << (VA_BITS - 1)) - 1) - #ifdef __ASSEMBLY__ #include <asm/alternative.h> @@ -81,28 +78,19 @@ * Convert a kernel VA into a HYP VA. * reg: VA to be converted. * - * This generates the following sequences: - * - High mask: - * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK - * nop - * - Low mask: - * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK - * and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK - * - VHE: - * nop - * nop - * - * The "low mask" version works because the mask is a strict subset of - * the "high mask", hence performing the first mask for nothing. - * Should be completely invisible on any viable CPU. + * The actual code generation takes place in kvm_update_va_mask, and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_update_va_mask uses the + * specific registers encoded in the instructions). */ .macro kern_hyp_va reg -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK -alternative_else_nop_endif -alternative_if ARM64_HYP_OFFSET_LOW - and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK -alternative_else_nop_endif +alternative_cb kvm_update_va_mask + and \reg, \reg, #1 /* mask with va_mask */ + ror \reg, \reg, #1 /* rotate to the first tag bit */ + add \reg, \reg, #0 /* insert the low 12 bits of the tag */ + add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ + ror \reg, \reg, #63 /* rotate back */ +alternative_cb_end .endm #else @@ -113,24 +101,44 @@ alternative_else_nop_endif #include <asm/mmu_context.h> #include <asm/pgtable.h> +void kvm_update_va_mask(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); + static inline unsigned long __kern_hyp_va(unsigned long v) { - asm volatile(ALTERNATIVE("and %0, %0, %1", - "nop", - ARM64_HAS_VIRT_HOST_EXTN) - : "+r" (v) - : "i" (HYP_PAGE_OFFSET_HIGH_MASK)); - asm volatile(ALTERNATIVE("nop", - "and %0, %0, %1", - ARM64_HYP_OFFSET_LOW) - : "+r" (v) - : "i" (HYP_PAGE_OFFSET_LOW_MASK)); + asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" + "ror %0, %0, #1\n" + "add %0, %0, #0\n" + "add %0, %0, #0, lsl 12\n" + "ror %0, %0, #63\n", + kvm_update_va_mask) + : "+r" (v)); return v; } #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) /* + * Obtain the PC-relative address of a kernel symbol + * s: symbol + * + * The goal of this macro is to return a symbol's address based on a + * PC-relative computation, as opposed to a loading the VA from a + * constant pool or something similar. This works well for HYP, as an + * absolute VA is guaranteed to be wrong. Only use this if trying to + * obtain the address of a symbol (i.e. not something you obtained by + * following a pointer). + */ +#define hyp_symbol_addr(s) \ + ({ \ + typeof(s) *addr; \ + asm("adrp %0, %1\n" \ + "add %0, %0, :lo12:%1\n" \ + : "=r" (addr) : "S" (&s)); \ + addr; \ + }) + +/* * We currently only support a 40bit IPA. */ #define KVM_PHYS_SHIFT (40) @@ -140,7 +148,11 @@ static inline unsigned long __kern_hyp_va(unsigned long v) #include <asm/stage2_pgtable.h> int create_hyp_mappings(void *from, void *to, pgprot_t prot); -int create_hyp_io_mappings(void *from, void *to, phys_addr_t); +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr, + void __iomem **haddr); +int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, + void **haddr); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); @@ -249,7 +261,7 @@ struct kvm; static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { - return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; + return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) @@ -348,36 +360,111 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + +#ifdef CONFIG_KVM_INDIRECT_VECTORS +/* + * EL2 vectors can be mapped and rerouted in a number of ways, + * depending on the kernel configuration and CPU present: + * + * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the + * hardening sequence is placed in one of the vector slots, which is + * executed before jumping to the real vectors. + * + * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the + * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the + * hardening sequence is mapped next to the idmap page, and executed + * before jumping to the real vectors. + * + * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an + * empty slot is selected, mapped next to the idmap page, and + * executed before jumping to the real vectors. + * + * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with + * VHE, as we don't have hypervisor-specific mappings. If the system + * is VHE and yet selects this capability, it will be ignored. + */ #include <asm/mmu.h> +extern void *__kvm_bp_vect_base; +extern int __kvm_harden_el2_vector_slot; + static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); - void *vect = kvm_ksym_ref(__kvm_hyp_vector); + void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); + int slot = -1; - if (data->fn) { - vect = __bp_harden_hyp_vecs_start + - data->hyp_vectors_slot * SZ_2K; + if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) { + vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start)); + slot = data->hyp_vectors_slot; + } - if (!has_vhe()) - vect = lm_alias(vect); + if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { + vect = __kvm_bp_vect_base; + if (slot == -1) + slot = __kvm_harden_el2_vector_slot; } + if (slot != -1) + vect += slot * SZ_2K; + return vect; } +/* This is only called on a !VHE system */ static inline int kvm_map_vectors(void) { - return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start), - kvm_ksym_ref(__bp_harden_hyp_vecs_end), - PAGE_HYP_EXEC); -} + /* + * HBP = ARM64_HARDEN_BRANCH_PREDICTOR + * HEL2 = ARM64_HARDEN_EL2_VECTORS + * + * !HBP + !HEL2 -> use direct vectors + * HBP + !HEL2 -> use hardened vectors in place + * !HBP + HEL2 -> allocate one vector slot and use exec mapping + * HBP + HEL2 -> use hardened vertors and use exec mapping + */ + if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) { + __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start); + __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); + } + if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { + phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs_start); + unsigned long size = (__bp_harden_hyp_vecs_end - + __bp_harden_hyp_vecs_start); + + /* + * Always allocate a spare vector slot, as we don't + * know yet which CPUs have a BP hardening slot that + * we can reuse. + */ + __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); + return create_hyp_exec_mappings(vect_pa, size, + &__kvm_bp_vect_base); + } + + return 0; +} #else static inline void *kvm_get_hyp_vector(void) { - return kvm_ksym_ref(__kvm_hyp_vector); + return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); } static inline int kvm_map_vectors(void) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 50fa96a49792..49d99214f43c 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -29,12 +29,6 @@ #include <asm/sizes.h> /* - * Allow for constants defined here to be used from assembly code - * by prepending the UL suffix only with actual C code compilation. - */ -#define UL(x) _AC(x, UL) - -/* * Size of the PCI I/O space. This must remain a power of two so that * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses. */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index a050d4f3615d..dd320df0d026 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -21,6 +21,8 @@ #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) #define TTBR_ASID_MASK (UL(0xffff) << 48) +#define BP_HARDEN_EL2_SLOTS 4 + #ifndef __ASSEMBLY__ typedef struct { @@ -49,9 +51,13 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ + defined(CONFIG_HARDEN_EL2_VECTORS)) extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; +extern atomic_t arm64_el2_vector_last_slot; +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */ +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index b6dbbe3123a9..97d0ef12e2ff 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -39,7 +39,7 @@ struct mod_arch_specific { u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, Elf64_Sym *sym); -u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val); +u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val); #ifdef CONFIG_RANDOMIZE_BASE extern u64 module_alloc_base; diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index 8747f7c5e0e7..9e690686e8aa 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -18,11 +18,6 @@ #define pcibios_assign_all_busses() \ (pci_has_flag(PCI_REASSIGN_ALL_BUS)) -/* - * PCI address space differs from physical memory address space - */ -#define PCI_DMA_BUS_IS_PHYS (0) - #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 extern int isa_dma_bridge_buggy; diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7e2c27e63cd8..7c4c8f318ba9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -230,7 +230,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) } } -extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); +extern void __sync_icache_dcache(pte_t pteval); /* * PTE bits configuration in the presence of hardware Dirty Bit Management @@ -253,7 +253,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t old_pte; if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) - __sync_icache_dcache(pte, addr); + __sync_icache_dcache(pte); /* * If the existing pte is valid, check for potential race with diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index ebdae15d665d..26c5bd7d88d8 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -122,11 +122,6 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - /* - * Ensure prior spin_lock operations to other locks have completed - * on this CPU before we test whether "lock" is locked. - */ - smp_mb(); /* ^^^ */ return !arch_spin_value_unlocked(READ_ONCE(*lock)); } diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h index 15e35598ac40..eab738019707 100644 --- a/arch/arm64/include/asm/stat.h +++ b/arch/arm64/include/asm/stat.h @@ -20,6 +20,7 @@ #ifdef CONFIG_COMPAT +#include <linux/compat_time.h> #include <asm/compat.h> /* diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e7b9f154e476..6171178075dc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -288,6 +288,12 @@ #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) +#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0) +#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1) +#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2) +#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3) +#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7) + #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) #define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 9abbf3044654..04b3256f8e6d 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -206,6 +206,12 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +/* KVM-as-firmware specific pseudo-registers */ +#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW | ((r) & 0xffff)) +#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 6a4bd80c75bd..bf825f38d206 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -55,10 +55,6 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o -ifeq ($(CONFIG_KVM),y) -arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o -endif - obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 414288a558c8..5c4bce4ac381 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -107,32 +107,53 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } +static void patch_alternative(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + __le32 *replptr; + int i; + + replptr = ALT_REPL_PTR(alt); + for (i = 0; i < nr_inst; i++) { + u32 insn; + + insn = get_alt_insn(alt, origptr + i, replptr + i); + updptr[i] = cpu_to_le32(insn); + } +} + static void __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; - __le32 *origptr, *replptr, *updptr; + __le32 *origptr, *updptr; + alternative_cb_t alt_cb; for (alt = region->begin; alt < region->end; alt++) { - u32 insn; - int i, nr_inst; + int nr_inst; - if (!cpus_have_cap(alt->cpufeature)) + /* Use ARM64_CB_PATCH as an unconditional patch */ + if (alt->cpufeature < ARM64_CB_PATCH && + !cpus_have_cap(alt->cpufeature)) continue; - BUG_ON(alt->alt_len != alt->orig_len); + if (alt->cpufeature == ARM64_CB_PATCH) + BUG_ON(alt->alt_len != 0); + else + BUG_ON(alt->alt_len != alt->orig_len); pr_info_once("patching kernel code\n"); origptr = ALT_ORIG_PTR(alt); - replptr = ALT_REPL_PTR(alt); updptr = use_linear_alias ? lm_alias(origptr) : origptr; - nr_inst = alt->alt_len / sizeof(insn); + nr_inst = alt->orig_len / AARCH64_INSN_SIZE; - for (i = 0; i < nr_inst; i++) { - insn = get_alt_insn(alt, origptr + i, replptr + i); - updptr[i] = cpu_to_le32(insn); - } + if (alt->cpufeature < ARM64_CB_PATCH) + alt_cb = patch_alternative; + else + alt_cb = ALT_REPL_PTR(alt); + + alt_cb(alt, origptr, updptr, nr_inst); flush_icache_range((uintptr_t)origptr, (uintptr_t)(origptr + nr_inst)); diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 66be504edb6c..d894a20b70b2 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -75,3 +75,11 @@ NOKPROBE_SYMBOL(_mcount); /* arm-smccc */ EXPORT_SYMBOL(__arm_smccc_smc); EXPORT_SYMBOL(__arm_smccc_hvc); + + /* tishift.S */ +extern long long __ashlti3(long long a, int b); +EXPORT_SYMBOL(__ashlti3); +extern long long __ashrti3(long long a, int b); +EXPORT_SYMBOL(__ashrti3); +extern long long __lshrti3(long long a, int b); +EXPORT_SYMBOL(__lshrti3); diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 1303e04110cd..5bdda651bd05 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -23,6 +23,7 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> +#include <linux/preempt.h> #include <linux/suspend.h> #include <asm/cpufeature.h> #include <asm/fixmap.h> @@ -93,6 +94,8 @@ int main(void) DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); + DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); + BLANK(); DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); @@ -138,6 +141,7 @@ int main(void) DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S deleted file mode 100644 index e5de33513b5d..000000000000 --- a/arch/arm64/kernel/bpi.S +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Contains CPU specific branch predictor invalidation sequences - * - * Copyright (C) 2018 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/arm-smccc.h> - -.macro ventry target - .rept 31 - nop - .endr - b \target -.endm - -.macro vectors target - ventry \target + 0x000 - ventry \target + 0x080 - ventry \target + 0x100 - ventry \target + 0x180 - - ventry \target + 0x200 - ventry \target + 0x280 - ventry \target + 0x300 - ventry \target + 0x380 - - ventry \target + 0x400 - ventry \target + 0x480 - ventry \target + 0x500 - ventry \target + 0x580 - - ventry \target + 0x600 - ventry \target + 0x680 - ventry \target + 0x700 - ventry \target + 0x780 -.endm - - .align 11 -ENTRY(__bp_harden_hyp_vecs_start) - .rept 4 - vectors __kvm_hyp_vector - .endr -ENTRY(__bp_harden_hyp_vecs_end) - -ENTRY(__qcom_hyp_sanitize_link_stack_start) - stp x29, x30, [sp, #-16]! - .rept 16 - bl . + 4 - .endr - ldp x29, x30, [sp], #16 -ENTRY(__qcom_hyp_sanitize_link_stack_end) - -.macro smccc_workaround_1 inst - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - \inst #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -.endm - -ENTRY(__smccc_workaround_1_smc_start) - smccc_workaround_1 smc -ENTRY(__smccc_workaround_1_smc_end) - -ENTRY(__smccc_workaround_1_hvc_start) - smccc_workaround_1 hvc -ENTRY(__smccc_workaround_1_hvc_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2df792771053..e4a1182deff7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -78,19 +78,17 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) config_sctlr_el1(SCTLR_EL1_UCT, 0); } +atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR #include <asm/mmu_context.h> #include <asm/cacheflush.h> DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); -#ifdef CONFIG_KVM -extern char __qcom_hyp_sanitize_link_stack_start[]; -extern char __qcom_hyp_sanitize_link_stack_end[]; +#ifdef CONFIG_KVM_INDIRECT_VECTORS extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; -extern char __smccc_workaround_1_hvc_start[]; -extern char __smccc_workaround_1_hvc_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -108,7 +106,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, const char *hyp_vecs_end) { - static int last_slot = -1; static DEFINE_SPINLOCK(bp_lock); int cpu, slot = -1; @@ -121,10 +118,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, } if (slot == -1) { - last_slot++; - BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start) - / SZ_2K) <= last_slot); - slot = last_slot; + slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); } @@ -133,12 +128,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, spin_unlock(&bp_lock); } #else -#define __qcom_hyp_sanitize_link_stack_start NULL -#define __qcom_hyp_sanitize_link_stack_end NULL #define __smccc_workaround_1_smc_start NULL #define __smccc_workaround_1_smc_end NULL -#define __smccc_workaround_1_hvc_start NULL -#define __smccc_workaround_1_hvc_end NULL static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, @@ -146,7 +137,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, { __this_cpu_write(bp_hardening_data.fn, fn); } -#endif /* CONFIG_KVM */ +#endif /* CONFIG_KVM_INDIRECT_VECTORS */ static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, bp_hardening_cb_t fn, @@ -179,12 +170,25 @@ static void call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } +static void qcom_link_stack_sanitization(void) +{ + u64 tmp; + + asm volatile("mov %0, x30 \n" + ".rept 16 \n" + "bl . + 4 \n" + ".endr \n" + "mov x30, %0 \n" + : "=&r" (tmp)); +} + static void enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) { bp_hardening_cb_t cb; void *smccc_start, *smccc_end; struct arm_smccc_res res; + u32 midr = read_cpuid_id(); if (!entry->matches(entry, SCOPE_LOCAL_CPU)) return; @@ -199,8 +203,9 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) if ((int)res.a0 < 0) return; cb = call_hvc_arch_workaround_1; - smccc_start = __smccc_workaround_1_hvc_start; - smccc_end = __smccc_workaround_1_hvc_end; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; break; case PSCI_CONDUIT_SMC: @@ -217,30 +222,14 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) return; } + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) + cb = qcom_link_stack_sanitization; + install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); return; } - -static void qcom_link_stack_sanitization(void) -{ - u64 tmp; - - asm volatile("mov %0, x30 \n" - ".rept 16 \n" - "bl . + 4 \n" - ".endr \n" - "mov x30, %0 \n" - : "=&r" (tmp)); -} - -static void -qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) -{ - install_bp_hardening_cb(entry, qcom_link_stack_sanitization, - __qcom_hyp_sanitize_link_stack_start, - __qcom_hyp_sanitize_link_stack_end); -} #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ @@ -325,24 +314,19 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - {}, -}; - -static const struct midr_range qcom_bp_harden_cpus[] = { MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER), {}, }; -static const struct arm64_cpu_capabilities arm64_bp_harden_list[] = { - { - CAP_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - CAP_MIDR_RANGE_LIST(qcom_bp_harden_cpus), - .cpu_enable = qcom_enable_link_stack_sanitization, - }, +#endif + +#ifdef CONFIG_HARDEN_EL2_VECTORS + +static const struct midr_range arm64_harden_el2_vectors[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), {}, }; @@ -492,13 +476,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .matches = multi_entry_cap_matches, - .cpu_enable = multi_entry_cap_cpu_enable, - .match_list = arm64_bp_harden_list, + .cpu_enable = enable_smccc_arch_workaround_1, + ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), }, +#endif +#ifdef CONFIG_HARDEN_EL2_VECTORS { - .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), + .desc = "EL2 vector hardening", + .capability = ARM64_HARDEN_EL2_VECTORS, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), }, #endif { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 96b15d7b10a8..9d1b06d67c53 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -838,19 +838,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _ MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); } -static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, - int __unused) -{ - phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); - - /* - * Activate the lower HYP offset only if: - * - the idmap doesn't clash with it, - * - the kernel is not running at EL2. - */ - return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode(); -} - static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) { u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); @@ -881,6 +868,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, static const struct midr_range kpti_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + { /* sentinel */ } }; char const *str = "command line option"; @@ -1121,12 +1109,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL0_SHIFT, .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, }, - { - .desc = "Reduced HYP mapping offset", - .capability = ARM64_HYP_OFFSET_LOW, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .matches = hyp_offset_low, - }, #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .desc = "Kernel page table isolation (KPTI)", diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 87a35364e750..4bcdd0318729 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -882,7 +882,7 @@ asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) si_code = FPE_FLTRES; } - memset(&info, 0, sizeof(info)); + clear_siginfo(&info); info.si_signo = SIGFPE; info.si_code = si_code; info.si_addr = (void __user *)instruction_pointer(regs); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2b6b8b24e5ab..b0853069702f 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -577,6 +577,13 @@ set_hcr: 7: msr mdcr_el2, x3 // Configure debug traps + /* LORegions */ + mrs x1, id_aa64mmfr1_el1 + ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 + cbz x0, 1f + msr_s SYS_LORC_EL1, xzr +1: + /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 74bb56f656ef..413dbe530da8 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -30,7 +30,6 @@ #include <linux/smp.h> #include <linux/uaccess.h> -#include <asm/compat.h> #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 2718a77da165..816d03c4c913 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -35,6 +35,7 @@ #define AARCH64_INSN_SF_BIT BIT(31) #define AARCH64_INSN_N_BIT BIT(22) +#define AARCH64_INSN_LSL_12 BIT(22) static int aarch64_insn_encoding_class[] = { AARCH64_INSN_CLS_UNKNOWN, @@ -343,6 +344,10 @@ static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type, mask = BIT(6) - 1; shift = 16; break; + case AARCH64_INSN_IMM_N: + mask = 1; + shift = 22; + break; default: return -EINVAL; } @@ -899,9 +904,18 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, return AARCH64_BREAK_FAULT; } + /* We can't encode more than a 24bit value (12bit + 12bit shift) */ + if (imm & ~(BIT(24) - 1)) + goto out; + + /* If we have something in the top 12 bits... */ if (imm & ~(SZ_4K - 1)) { - pr_err("%s: invalid immediate encoding %d\n", __func__, imm); - return AARCH64_BREAK_FAULT; + /* ... and in the low 12 bits -> error */ + if (imm & (SZ_4K - 1)) + goto out; + + imm >>= 12; + insn |= AARCH64_INSN_LSL_12; } insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst); @@ -909,6 +923,10 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src); return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); + +out: + pr_err("%s: invalid immediate encoding %d\n", __func__, imm); + return AARCH64_BREAK_FAULT; } u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst, @@ -1481,3 +1499,171 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = { __check_hi, __check_ls, __check_ge, __check_lt, __check_gt, __check_le, __check_al, __check_al }; + +static bool range_of_ones(u64 val) +{ + /* Doesn't handle full ones or full zeroes */ + u64 sval = val >> __ffs64(val); + + /* One of Sean Eron Anderson's bithack tricks */ + return ((sval + 1) & (sval)) == 0; +} + +static u32 aarch64_encode_immediate(u64 imm, + enum aarch64_insn_variant variant, + u32 insn) +{ + unsigned int immr, imms, n, ones, ror, esz, tmp; + u64 mask = ~0UL; + + /* Can't encode full zeroes or full ones */ + if (!imm || !~imm) + return AARCH64_BREAK_FAULT; + + switch (variant) { + case AARCH64_INSN_VARIANT_32BIT: + if (upper_32_bits(imm)) + return AARCH64_BREAK_FAULT; + esz = 32; + break; + case AARCH64_INSN_VARIANT_64BIT: + insn |= AARCH64_INSN_SF_BIT; + esz = 64; + break; + default: + pr_err("%s: unknown variant encoding %d\n", __func__, variant); + return AARCH64_BREAK_FAULT; + } + + /* + * Inverse of Replicate(). Try to spot a repeating pattern + * with a pow2 stride. + */ + for (tmp = esz / 2; tmp >= 2; tmp /= 2) { + u64 emask = BIT(tmp) - 1; + + if ((imm & emask) != ((imm >> tmp) & emask)) + break; + + esz = tmp; + mask = emask; + } + + /* N is only set if we're encoding a 64bit value */ + n = esz == 64; + + /* Trim imm to the element size */ + imm &= mask; + + /* That's how many ones we need to encode */ + ones = hweight64(imm); + + /* + * imms is set to (ones - 1), prefixed with a string of ones + * and a zero if they fit. Cap it to 6 bits. + */ + imms = ones - 1; + imms |= 0xf << ffs(esz); + imms &= BIT(6) - 1; + + /* Compute the rotation */ + if (range_of_ones(imm)) { + /* + * Pattern: 0..01..10..0 + * + * Compute how many rotate we need to align it right + */ + ror = __ffs64(imm); + } else { + /* + * Pattern: 0..01..10..01..1 + * + * Fill the unused top bits with ones, and check if + * the result is a valid immediate (all ones with a + * contiguous ranges of zeroes). + */ + imm |= ~mask; + if (!range_of_ones(~imm)) + return AARCH64_BREAK_FAULT; + + /* + * Compute the rotation to get a continuous set of + * ones, with the first bit set at position 0 + */ + ror = fls(~imm); + } + + /* + * immr is the number of bits we need to rotate back to the + * original set of ones. Note that this is relative to the + * element size... + */ + immr = (esz - ror) % esz; + + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n); + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr); + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms); +} + +u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type, + enum aarch64_insn_variant variant, + enum aarch64_insn_register Rn, + enum aarch64_insn_register Rd, + u64 imm) +{ + u32 insn; + + switch (type) { + case AARCH64_INSN_LOGIC_AND: + insn = aarch64_insn_get_and_imm_value(); + break; + case AARCH64_INSN_LOGIC_ORR: + insn = aarch64_insn_get_orr_imm_value(); + break; + case AARCH64_INSN_LOGIC_EOR: + insn = aarch64_insn_get_eor_imm_value(); + break; + case AARCH64_INSN_LOGIC_AND_SETFLAGS: + insn = aarch64_insn_get_ands_imm_value(); + break; + default: + pr_err("%s: unknown logical encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); + return aarch64_encode_immediate(imm, variant, insn); +} + +u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, + enum aarch64_insn_register Rm, + enum aarch64_insn_register Rn, + enum aarch64_insn_register Rd, + u8 lsb) +{ + u32 insn; + + insn = aarch64_insn_get_extr_value(); + + switch (variant) { + case AARCH64_INSN_VARIANT_32BIT: + if (lsb > 31) + return AARCH64_BREAK_FAULT; + break; + case AARCH64_INSN_VARIANT_64BIT: + if (lsb > 63) + return AARCH64_BREAK_FAULT; + insn |= AARCH64_INSN_SF_BIT; + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1); + break; + default: + pr_err("%s: unknown variant encoding %d\n", __func__, variant); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); +} diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index fa3637284a3d..f0690c2ca3e0 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -43,7 +43,7 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, } #ifdef CONFIG_ARM64_ERRATUM_843419 -u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val) +u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val) { struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : &mod->arch.init; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 719fde8dcc19..155fd91e78f4 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -215,7 +215,7 @@ static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val) insn &= ~BIT(31); } else { /* out of range for ADR -> emit a veneer */ - val = module_emit_adrp_veneer(mod, place, val & ~0xfff); + val = module_emit_veneer_for_adrp(mod, place, val & ~0xfff); if (!val) return -ENOEXEC; insn = aarch64_insn_gen_branch_imm((u64)place, val, diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 1d091d048d04..0bbac612146e 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/compat.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/perf_event.h> #include <linux/bug.h> #include <linux/sched/task_stack.h> -#include <asm/compat.h> #include <asm/perf_regs.h> #include <asm/ptrace.h> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 71d99af24ef2..7ff81fed46e1 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -25,6 +25,7 @@ #include <linux/sched/signal.h> #include <linux/sched/task_stack.h> #include <linux/mm.h> +#include <linux/nospec.h> #include <linux/smp.h> #include <linux/ptrace.h> #include <linux/user.h> @@ -249,15 +250,20 @@ static struct perf_event *ptrace_hbp_get_event(unsigned int note_type, switch (note_type) { case NT_ARM_HW_BREAK: - if (idx < ARM_MAX_BRP) - bp = tsk->thread.debug.hbp_break[idx]; + if (idx >= ARM_MAX_BRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_BRP); + bp = tsk->thread.debug.hbp_break[idx]; break; case NT_ARM_HW_WATCH: - if (idx < ARM_MAX_WRP) - bp = tsk->thread.debug.hbp_watch[idx]; + if (idx >= ARM_MAX_WRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_WRP); + bp = tsk->thread.debug.hbp_watch[idx]; break; } +out: return bp; } @@ -1458,9 +1464,7 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num, { int ret; u32 kdata; - mm_segment_t old_fs = get_fs(); - set_fs(KERNEL_DS); /* Watchpoint */ if (num < 0) { ret = compat_ptrace_hbp_get(NT_ARM_HW_WATCH, tsk, num, &kdata); @@ -1471,7 +1475,6 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num, } else { ret = compat_ptrace_hbp_get(NT_ARM_HW_BREAK, tsk, num, &kdata); } - set_fs(old_fs); if (!ret) ret = put_user(kdata, data); @@ -1484,7 +1487,6 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num, { int ret; u32 kdata = 0; - mm_segment_t old_fs = get_fs(); if (num == 0) return 0; @@ -1493,12 +1495,10 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num, if (ret) return ret; - set_fs(KERNEL_DS); if (num < 0) ret = compat_ptrace_hbp_set(NT_ARM_HW_WATCH, tsk, num, &kdata); else ret = compat_ptrace_hbp_set(NT_ARM_HW_BREAK, tsk, num, &kdata); - set_fs(old_fs); return ret; } diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 93ab57dcfc14..a6109825eeb9 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -112,6 +112,7 @@ long compat_arm_syscall(struct pt_regs *regs) break; } + clear_siginfo(&info); info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_ILLTRP; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index ba964da31a25..d399d459397b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -277,7 +277,8 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) * If we were single stepping, we want to get the step exception after * we return from the trap. */ - user_fastforward_single_step(current); + if (user_mode(regs)) + user_fastforward_single_step(current); } static LIST_HEAD(undef_hook); @@ -366,7 +367,7 @@ void force_signal_inject(int signal, int code, unsigned long address) } /* Force signals we don't understand to SIGKILL */ - if (WARN_ON(signal != SIGKILL || + if (WARN_ON(signal != SIGKILL && siginfo_layout(signal, code) != SIL_FAULT)) { signal = SIGKILL; } @@ -634,6 +635,7 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); + clear_siginfo(&info); info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_ILLOPC; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 2257dfcc44cc..a2e3a5af1113 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -57,6 +57,9 @@ config KVM_ARM_PMU Adds support for a virtual Performance Monitoring Unit (PMU) in virtual machines. +config KVM_INDIRECT_VECTORS + def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS) + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 87c4f7ae24de..93afff91cb7c 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -16,7 +16,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o +kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index fa63b28c65e0..a1f4ebdfe6d3 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -46,7 +46,9 @@ static DEFINE_PER_CPU(u32, mdcr_el2); */ static void save_guest_debug_regs(struct kvm_vcpu *vcpu) { - vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1); + u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + + vcpu->arch.guest_debug_preserved.mdscr_el1 = val; trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", vcpu->arch.guest_debug_preserved.mdscr_el1); @@ -54,10 +56,12 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu) static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) { - vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1; + u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1; + + vcpu_write_sys_reg(vcpu, val, MDSCR_EL1); trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", - vcpu_sys_reg(vcpu, MDSCR_EL1)); + vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } /** @@ -108,6 +112,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY); + unsigned long mdscr; trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); @@ -152,9 +157,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) */ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; - vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS; + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + mdscr |= DBG_MDSCR_SS; + vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); } else { - vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS; + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + mdscr &= ~DBG_MDSCR_SS; + vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); } trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu)); @@ -170,7 +179,9 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { /* Enable breakpoints/watchpoints */ - vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE; + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + mdscr |= DBG_MDSCR_MDE; + vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; @@ -193,8 +204,12 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) if (trap_debug) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; + /* If KDE or MDE are set, perform a full save/restore cycle. */ + if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); - trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1)); + trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 959e50d2588c..56a0260ceb11 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -25,6 +25,7 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/fs.h> +#include <kvm/arm_psci.h> #include <asm/cputype.h> #include <linux/uaccess.h> #include <asm/kvm.h> @@ -205,7 +206,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) - + NUM_TIMER_REGS; + + kvm_arm_get_fw_num_regs(vcpu) + NUM_TIMER_REGS; } /** @@ -225,6 +226,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices); + if (ret) + return ret; + uindices += kvm_arm_get_fw_num_regs(vcpu); + ret = copy_timer_indices(vcpu, uindices); if (ret) return ret; @@ -243,6 +249,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) + return kvm_arm_get_fw_reg(vcpu, reg); + if (is_timer_reg(reg->id)) return get_timer_reg(vcpu, reg); @@ -259,6 +268,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) + return kvm_arm_set_fw_reg(vcpu, reg); + if (is_timer_reg(reg->id)) return set_timer_reg(vcpu, reg); diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 5aa9ccf6db99..6fd91b31a131 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -117,7 +117,6 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE) /* Set the stack and new vectors */ kern_hyp_va x1 mov sp, x1 - kern_hyp_va x2 msr vbar_el2, x2 /* copy tpidr_el1 into tpidr_el2 for use by HYP */ diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index f04400d494b7..4313f7475333 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -7,10 +7,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING KVM=../../../../virt/kvm -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o obj-$(CONFIG_KVM_ARM_HOST) += entry.o diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index dabb5cc7b087..3e717f66f011 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -66,11 +66,6 @@ default: write_debug(ptr[0], reg, 0); \ } -static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1) -{ - /* The vcpu can run. but it can't hide. */ -} - static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) { u64 reg; @@ -103,11 +98,7 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) dsb(nsh); } -static hyp_alternate_select(__debug_save_spe, - __debug_save_spe_nvhe, __debug_save_spe_vhe, - ARM64_HAS_VIRT_HOST_EXTN); - -static void __hyp_text __debug_restore_spe(u64 pmscr_el1) +static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1) { if (!pmscr_el1) return; @@ -119,16 +110,13 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1) write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); } -void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; - if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) - return; - aa64dfr0 = read_sysreg(id_aa64dfr0_el1); brps = (aa64dfr0 >> 12) & 0xf; wrps = (aa64dfr0 >> 20) & 0xf; @@ -141,16 +129,13 @@ void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); } -void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; - if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) - return; - aa64dfr0 = read_sysreg(id_aa64dfr0_el1); brps = (aa64dfr0 >> 12) & 0xf; @@ -164,27 +149,54 @@ void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); } -void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu) +void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) { - /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform - * a full save/restore cycle. */ - if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) || - (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE)) - vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; - - __debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs, - kern_hyp_va(vcpu->arch.host_cpu_context)); - __debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1); + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + struct kvm_guest_debug_arch *host_dbg; + struct kvm_guest_debug_arch *guest_dbg; + + /* + * Non-VHE: Disable and flush SPE data generation + * VHE: The vcpu can run, but it can't hide. + */ + if (!has_vhe()) + __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + guest_ctxt = &vcpu->arch.ctxt; + host_dbg = &vcpu->arch.host_debug_state.regs; + guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); + + __debug_save_state(vcpu, host_dbg, host_ctxt); + __debug_restore_state(vcpu, guest_dbg, guest_ctxt); } -void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) +void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) { - __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); - __debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs, - kern_hyp_va(vcpu->arch.host_cpu_context)); + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + struct kvm_guest_debug_arch *host_dbg; + struct kvm_guest_debug_arch *guest_dbg; + + if (!has_vhe()) + __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + guest_ctxt = &vcpu->arch.ctxt; + host_dbg = &vcpu->arch.host_debug_state.regs; + guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); + + __debug_save_state(vcpu, guest_dbg, guest_ctxt); + __debug_restore_state(vcpu, host_dbg, host_ctxt); - if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) - vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; + vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; } u32 __hyp_text __kvm_get_mdcr_el2(void) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index fdd1068ee3a5..e41a161d313a 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -62,9 +62,6 @@ ENTRY(__guest_enter) // Store the host regs save_callee_saved_regs x1 - // Store host_ctxt and vcpu for use at exit time - stp x1, x0, [sp, #-16]! - add x18, x0, #VCPU_CONTEXT // Restore guest regs x0-x17 @@ -118,8 +115,7 @@ ENTRY(__guest_exit) // Store the guest regs x19-x29, lr save_callee_saved_regs x1 - // Restore the host_ctxt from the stack - ldr x2, [sp], #16 + get_host_ctxt x2, x3 // Now restore the host regs restore_callee_saved_regs x2 @@ -213,15 +209,3 @@ alternative_endif eret ENDPROC(__fpsimd_guest_restore) - -ENTRY(__qcom_hyp_sanitize_btac_predictors) - /** - * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700) - * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls - * b15-b0: contains SiP functionID - */ - movz x0, #0x1700 - movk x0, #0xc200, lsl #16 - smc #0 - ret -ENDPROC(__qcom_hyp_sanitize_btac_predictors) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index f36464bd57c5..bffece27b5c1 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 - ARM Ltd + * Copyright (C) 2015-2018 - ARM Ltd * Author: Marc Zyngier <marc.zyngier@arm.com> * * This program is free software; you can redistribute it and/or modify @@ -24,6 +24,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> +#include <asm/mmu.h> .text .pushsection .hyp.text, "ax" @@ -55,15 +56,9 @@ ENTRY(__vhe_hyp_call) ENDPROC(__vhe_hyp_call) el1_sync: // Guest trapped into EL2 - stp x0, x1, [sp, #-16]! - -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x1, esr_el2 -alternative_else - mrs x1, esr_el1 -alternative_endif - lsr x0, x1, #ESR_ELx_EC_SHIFT + mrs x0, esr_el2 + lsr x0, x0, #ESR_ELx_EC_SHIFT cmp x0, #ESR_ELx_EC_HVC64 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap @@ -117,10 +112,14 @@ el1_hvc_guest: eret el1_trap: + get_vcpu_ptr x1, x0 + + mrs x0, esr_el2 + lsr x0, x0, #ESR_ELx_EC_SHIFT /* * x0: ESR_EC + * x1: vcpu pointer */ - ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter /* * We trap the first access to the FP/SIMD to save the host context @@ -137,18 +136,18 @@ alternative_else_nop_endif b __guest_exit el1_irq: - stp x0, x1, [sp, #-16]! - ldr x1, [sp, #16 + 8] + get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IRQ b __guest_exit el1_error: - stp x0, x1, [sp, #-16]! - ldr x1, [sp, #16 + 8] + get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_EL1_SERROR b __guest_exit el2_error: + ldp x0, x1, [sp], #16 + /* * Only two possibilities: * 1) Either we come from the exit path, having just unmasked @@ -180,14 +179,7 @@ ENTRY(__hyp_do_panic) ENDPROC(__hyp_do_panic) ENTRY(__hyp_panic) - /* - * '=kvm_host_cpu_state' is a host VA from the constant pool, it may - * not be accessible by this address from EL2, hyp_panic() converts - * it with kern_hyp_va() before use. - */ - ldr x0, =kvm_host_cpu_state - mrs x1, tpidr_el2 - add x0, x0, x1 + get_host_ctxt x0, x1 b hyp_panic ENDPROC(__hyp_panic) @@ -206,32 +198,104 @@ ENDPROC(\label) invalid_vector el2h_sync_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid - invalid_vector el1_sync_invalid - invalid_vector el1_irq_invalid invalid_vector el1_fiq_invalid .ltorg .align 11 +.macro valid_vect target + .align 7 + stp x0, x1, [sp, #-16]! + b \target +.endm + +.macro invalid_vect target + .align 7 + b \target + ldp x0, x1, [sp], #16 + b \target +.endm + ENTRY(__kvm_hyp_vector) - ventry el2t_sync_invalid // Synchronous EL2t - ventry el2t_irq_invalid // IRQ EL2t - ventry el2t_fiq_invalid // FIQ EL2t - ventry el2t_error_invalid // Error EL2t - - ventry el2h_sync_invalid // Synchronous EL2h - ventry el2h_irq_invalid // IRQ EL2h - ventry el2h_fiq_invalid // FIQ EL2h - ventry el2_error // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error // Error 64-bit EL1 - - ventry el1_sync // Synchronous 32-bit EL1 - ventry el1_irq // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error // Error 32-bit EL1 + invalid_vect el2t_sync_invalid // Synchronous EL2t + invalid_vect el2t_irq_invalid // IRQ EL2t + invalid_vect el2t_fiq_invalid // FIQ EL2t + invalid_vect el2t_error_invalid // Error EL2t + + invalid_vect el2h_sync_invalid // Synchronous EL2h + invalid_vect el2h_irq_invalid // IRQ EL2h + invalid_vect el2h_fiq_invalid // FIQ EL2h + valid_vect el2_error // Error EL2h + + valid_vect el1_sync // Synchronous 64-bit EL1 + valid_vect el1_irq // IRQ 64-bit EL1 + invalid_vect el1_fiq_invalid // FIQ 64-bit EL1 + valid_vect el1_error // Error 64-bit EL1 + + valid_vect el1_sync // Synchronous 32-bit EL1 + valid_vect el1_irq // IRQ 32-bit EL1 + invalid_vect el1_fiq_invalid // FIQ 32-bit EL1 + valid_vect el1_error // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) + +#ifdef CONFIG_KVM_INDIRECT_VECTORS +.macro hyp_ventry + .align 7 +1: .rept 27 + nop + .endr +/* + * The default sequence is to directly branch to the KVM vectors, + * using the computed offset. This applies for VHE as well as + * !ARM64_HARDEN_EL2_VECTORS. + * + * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced + * with: + * + * stp x0, x1, [sp, #-16]! + * movz x0, #(addr & 0xffff) + * movk x0, #((addr >> 16) & 0xffff), lsl #16 + * movk x0, #((addr >> 32) & 0xffff), lsl #32 + * br x0 + * + * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4. + * See kvm_patch_vector_branch for details. + */ +alternative_cb kvm_patch_vector_branch + b __kvm_hyp_vector + (1b - 0b) + nop + nop + nop + nop +alternative_cb_end +.endm + +.macro generate_vectors +0: + .rept 16 + hyp_ventry + .endr + .org 0b + SZ_2K // Safety measure +.endm + + .align 11 +ENTRY(__bp_harden_hyp_vecs_start) + .rept BP_HARDEN_EL2_SLOTS + generate_vectors + .endr +ENTRY(__bp_harden_hyp_vecs_end) + + .popsection + +ENTRY(__smccc_workaround_1_smc_start) + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +ENTRY(__smccc_workaround_1_smc_end) +#endif diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 870f4b1587f9..d9645236e474 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -33,49 +33,22 @@ static bool __hyp_text __fpsimd_enabled_nvhe(void) return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); } -static bool __hyp_text __fpsimd_enabled_vhe(void) +static bool fpsimd_enabled_vhe(void) { return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); } -static hyp_alternate_select(__fpsimd_is_enabled, - __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe, - ARM64_HAS_VIRT_HOST_EXTN); - -bool __hyp_text __fpsimd_enabled(void) -{ - return __fpsimd_is_enabled()(); -} - -static void __hyp_text __activate_traps_vhe(void) -{ - u64 val; - - val = read_sysreg(cpacr_el1); - val |= CPACR_EL1_TTA; - val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN); - write_sysreg(val, cpacr_el1); - - write_sysreg(kvm_get_hyp_vector(), vbar_el1); -} - -static void __hyp_text __activate_traps_nvhe(void) +/* Save the 32-bit only FPSIMD system register state */ +static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) { - u64 val; + if (!vcpu_el1_is_32bit(vcpu)) + return; - val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ; - write_sysreg(val, cptr_el2); + vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); } -static hyp_alternate_select(__activate_traps_arch, - __activate_traps_nvhe, __activate_traps_vhe, - ARM64_HAS_VIRT_HOST_EXTN); - -static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) +static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) { - u64 val; - /* * We are about to set CPTR_EL2.TFP to trap all floating point * register accesses to EL2, however, the ARM ARM clearly states that @@ -85,23 +58,17 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to * it will cause an exception. */ - val = vcpu->arch.hcr_el2; - - if (!(val & HCR_RW) && system_supports_fpsimd()) { + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { write_sysreg(1 << 30, fpexc32_el2); isb(); } +} - if (val & HCR_RW) /* for AArch64 only: */ - val |= HCR_TID3; /* TID3: trap feature register accesses */ - - write_sysreg(val, hcr_el2); - - if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE)) - write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - - /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ +static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) +{ + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); + /* * Make sure we trap PMU access from EL0 to EL2. Also sanitize * PMSELR_EL0 to make sure it never contains the cycle @@ -111,19 +78,56 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(0, pmselr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); - __activate_traps_arch()(); } -static void __hyp_text __deactivate_traps_vhe(void) +static void __hyp_text __deactivate_traps_common(void) { - extern char vectors[]; /* kernel exception vectors */ - u64 mdcr_el2 = read_sysreg(mdcr_el2); + write_sysreg(0, hstr_el2); + write_sysreg(0, pmuserenr_el0); +} - mdcr_el2 &= MDCR_EL2_HPMN_MASK | - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | - MDCR_EL2_TPMS; +static void activate_traps_vhe(struct kvm_vcpu *vcpu) +{ + u64 val; - write_sysreg(mdcr_el2, mdcr_el2); + val = read_sysreg(cpacr_el1); + val |= CPACR_EL1_TTA; + val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN); + write_sysreg(val, cpacr_el1); + + write_sysreg(kvm_get_hyp_vector(), vbar_el1); +} + +static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val; + + __activate_traps_common(vcpu); + + val = CPTR_EL2_DEFAULT; + val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ; + write_sysreg(val, cptr_el2); +} + +static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 hcr = vcpu->arch.hcr_el2; + + write_sysreg(hcr, hcr_el2); + + if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) + write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); + + __activate_traps_fpsimd32(vcpu); + if (has_vhe()) + activate_traps_vhe(vcpu); + else + __activate_traps_nvhe(vcpu); +} + +static void deactivate_traps_vhe(void) +{ + extern char vectors[]; /* kernel exception vectors */ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); write_sysreg(vectors, vbar_el1); @@ -133,6 +137,8 @@ static void __hyp_text __deactivate_traps_nvhe(void) { u64 mdcr_el2 = read_sysreg(mdcr_el2); + __deactivate_traps_common(); + mdcr_el2 &= MDCR_EL2_HPMN_MASK; mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; @@ -141,10 +147,6 @@ static void __hyp_text __deactivate_traps_nvhe(void) write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); } -static hyp_alternate_select(__deactivate_traps_arch, - __deactivate_traps_nvhe, __deactivate_traps_vhe, - ARM64_HAS_VIRT_HOST_EXTN); - static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) { /* @@ -156,14 +158,32 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) if (vcpu->arch.hcr_el2 & HCR_VSE) vcpu->arch.hcr_el2 = read_sysreg(hcr_el2); - __deactivate_traps_arch()(); - write_sysreg(0, hstr_el2); - write_sysreg(0, pmuserenr_el0); + if (has_vhe()) + deactivate_traps_vhe(); + else + __deactivate_traps_nvhe(); +} + +void activate_traps_vhe_load(struct kvm_vcpu *vcpu) +{ + __activate_traps_common(vcpu); +} + +void deactivate_traps_vhe_put(void) +{ + u64 mdcr_el2 = read_sysreg(mdcr_el2); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK | + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | + MDCR_EL2_TPMS; + + write_sysreg(mdcr_el2, mdcr_el2); + + __deactivate_traps_common(); } -static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) +static void __hyp_text __activate_vm(struct kvm *kvm) { - struct kvm *kvm = kern_hyp_va(vcpu->kvm); write_sysreg(kvm->arch.vttbr, vttbr_el2); } @@ -172,29 +192,22 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) write_sysreg(0, vttbr_el2); } -static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) +/* Save VGICv3 state on non-VHE systems */ +static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { __vgic_v3_save_state(vcpu); - else - __vgic_v2_save_state(vcpu); - - write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2); + __vgic_v3_deactivate_traps(vcpu); + } } -static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) +/* Restore VGICv3 state on non_VEH systems */ +static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { - u64 val; - - val = read_sysreg(hcr_el2); - val |= HCR_INT_OVERRIDE; - val |= vcpu->arch.irq_lines; - write_sysreg(val, hcr_el2); - - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_activate_traps(vcpu); __vgic_v3_restore_state(vcpu); - else - __vgic_v2_restore_state(vcpu); + } } static bool __hyp_text __true_value(void) @@ -305,54 +318,27 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) } } -int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) +/* + * Return true when we were able to fixup the guest exit and should return to + * the guest, false when we should restore the host state and return to the + * main run loop. + */ +static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - bool fp_enabled; - u64 exit_code; - - vcpu = kern_hyp_va(vcpu); - - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - __sysreg_save_host_state(host_ctxt); - __debug_cond_save_host_state(vcpu); - - __activate_traps(vcpu); - __activate_vm(vcpu); - - __vgic_restore_state(vcpu); - __timer_enable_traps(vcpu); - - /* - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_guest_state(guest_ctxt); - __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); - - /* Jump in the fire! */ -again: - exit_code = __guest_enter(vcpu, host_ctxt); - /* And we're baaack! */ - - if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ) + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr); + /* * We're using the raw exception code in order to only process * the trap if no SError is pending. We will come back to the * same PC once the SError has been injected, and replay the * trapping instruction. */ - if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) - goto again; + if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) + return true; if (static_branch_unlikely(&vgic_v2_cpuif_trap) && - exit_code == ARM_EXCEPTION_TRAP) { + *exit_code == ARM_EXCEPTION_TRAP) { bool valid; valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && @@ -366,9 +352,9 @@ again: if (ret == 1) { if (__skip_instr(vcpu)) - goto again; + return true; else - exit_code = ARM_EXCEPTION_TRAP; + *exit_code = ARM_EXCEPTION_TRAP; } if (ret == -1) { @@ -380,62 +366,135 @@ again: */ if (!__skip_instr(vcpu)) *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; - exit_code = ARM_EXCEPTION_EL1_SERROR; + *exit_code = ARM_EXCEPTION_EL1_SERROR; } - - /* 0 falls through to be handler out of EL2 */ } } if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - exit_code == ARM_EXCEPTION_TRAP && + *exit_code == ARM_EXCEPTION_TRAP && (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { int ret = __vgic_v3_perform_cpuif_access(vcpu); if (ret == 1) { if (__skip_instr(vcpu)) - goto again; + return true; else - exit_code = ARM_EXCEPTION_TRAP; + *exit_code = ARM_EXCEPTION_TRAP; } - - /* 0 falls through to be handled out of EL2 */ } - if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) { - u32 midr = read_cpuid_id(); + /* Return to the host kernel and handle the exit */ + return false; +} - /* Apply BTAC predictors mitigation to all Falkor chips */ - if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || - ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) { - __qcom_hyp_sanitize_btac_predictors(); - } +/* Switch to the guest for VHE systems running in EL2 */ +int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool fp_enabled; + u64 exit_code; + + host_ctxt = vcpu->arch.host_cpu_context; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + sysreg_save_host_state_vhe(host_ctxt); + + __activate_traps(vcpu); + __activate_vm(vcpu->kvm); + + sysreg_restore_guest_state_vhe(guest_ctxt); + __debug_switch_to_guest(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + fp_enabled = fpsimd_enabled_vhe(); + + sysreg_save_guest_state_vhe(guest_ctxt); + + __deactivate_traps(vcpu); + + sysreg_restore_host_state_vhe(host_ctxt); + + if (fp_enabled) { + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + __fpsimd_save_fpexc32(vcpu); } - fp_enabled = __fpsimd_enabled(); + __debug_switch_to_host(vcpu); + + return exit_code; +} + +/* Switch to the guest for legacy non-VHE systems */ +int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool fp_enabled; + u64 exit_code; + + vcpu = kern_hyp_va(vcpu); + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + __sysreg_save_state_nvhe(host_ctxt); + + __activate_traps(vcpu); + __activate_vm(kern_hyp_va(vcpu->kvm)); + + __hyp_vgic_restore_state(vcpu); + __timer_enable_traps(vcpu); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state_nvhe(guest_ctxt); + __debug_switch_to_guest(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); - __sysreg_save_guest_state(guest_ctxt); + fp_enabled = __fpsimd_enabled_nvhe(); + + __sysreg_save_state_nvhe(guest_ctxt); __sysreg32_save_state(vcpu); __timer_disable_traps(vcpu); - __vgic_save_state(vcpu); + __hyp_vgic_save_state(vcpu); __deactivate_traps(vcpu); __deactivate_vm(vcpu); - __sysreg_restore_host_state(host_ctxt); + __sysreg_restore_state_nvhe(host_ctxt); if (fp_enabled) { __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + __fpsimd_save_fpexc32(vcpu); } - __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); /* * This must come after restoring the host sysregs, since a non-VHE * system may enable SPE here and make use of the TTBRs. */ - __debug_cond_restore_host_state(vcpu); + __debug_switch_to_host(vcpu); return exit_code; } @@ -443,10 +502,20 @@ again: static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, - struct kvm_vcpu *vcpu) + struct kvm_cpu_context *__host_ctxt) { + struct kvm_vcpu *vcpu; unsigned long str_va; + vcpu = __host_ctxt->__hyp_running_vcpu; + + if (read_sysreg(vttbr_el2)) { + __timer_disable_traps(vcpu); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state_nvhe(__host_ctxt); + } + /* * Force the panic string to be loaded from the literal pool, * making sure it is a kernel address and not a PC-relative @@ -460,40 +529,31 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, read_sysreg(hpfar_el2), par, vcpu); } -static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, - struct kvm_vcpu *vcpu) +static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, + struct kvm_cpu_context *host_ctxt) { + struct kvm_vcpu *vcpu; + vcpu = host_ctxt->__hyp_running_vcpu; + + __deactivate_traps(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); + panic(__hyp_panic_string, spsr, elr, read_sysreg_el2(esr), read_sysreg_el2(far), read_sysreg(hpfar_el2), par, vcpu); } -static hyp_alternate_select(__hyp_call_panic, - __hyp_call_panic_nvhe, __hyp_call_panic_vhe, - ARM64_HAS_VIRT_HOST_EXTN); - -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt) +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) { - struct kvm_vcpu *vcpu = NULL; - u64 spsr = read_sysreg_el2(spsr); u64 elr = read_sysreg_el2(elr); u64 par = read_sysreg(par_el1); - if (read_sysreg(vttbr_el2)) { - struct kvm_cpu_context *host_ctxt; - - host_ctxt = kern_hyp_va(__host_ctxt); - vcpu = host_ctxt->__hyp_running_vcpu; - __timer_disable_traps(vcpu); - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - __sysreg_restore_host_state(host_ctxt); - } - - /* Call panic for real */ - __hyp_call_panic()(spsr, elr, par, vcpu); + if (!has_vhe()) + __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt); + else + __hyp_call_panic_vhe(spsr, elr, par, host_ctxt); unreachable(); } diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 2c17afd2be96..b3894df6bf1a 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -19,32 +19,43 @@ #include <linux/kvm_host.h> #include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> -/* Yes, this does nothing, on purpose */ -static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { } - /* * Non-VHE: Both host and guest must save everything. * - * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0, - * and guest must save everything. + * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate, + * which are handled as part of the el2 return state) on every switch. + * tpidr_el0 and tpidrro_el0 only need to be switched when going + * to host userspace or a different VCPU. EL1 registers only need to be + * switched when potentially going to run a different VCPU. The latter two + * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. */ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { - ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); - ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); - ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); + + /* + * The host arm64 Linux uses sp_el0 to point to 'current' and it must + * therefore be saved/restored on every entry/exit to/from the guest. + */ ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); } -static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) +static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); + ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); +} + +static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); + ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr); ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0); ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1); @@ -64,6 +75,10 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); +} + +static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) +{ ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); @@ -71,36 +86,48 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); } -static hyp_alternate_select(__sysreg_call_save_host_state, - __sysreg_save_state, __sysreg_do_nothing, - ARM64_HAS_VIRT_HOST_EXTN); +void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_el1_state(ctxt); + __sysreg_save_common_state(ctxt); + __sysreg_save_user_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} -void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt) +void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) { - __sysreg_call_save_host_state()(ctxt); __sysreg_save_common_state(ctxt); } -void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt) +void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) { - __sysreg_save_state(ctxt); __sysreg_save_common_state(ctxt); + __sysreg_save_el2_return_state(ctxt); } static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) { - write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); + + /* + * The host arm64 Linux uses sp_el0 to point to 'current' and it must + * therefore be saved/restored on every entry/exit to/from the guest. + */ write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); } -static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) +static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); + write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); +} + +static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr); + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr); write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0); write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1); @@ -120,6 +147,11 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); +} + +static void __hyp_text +__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) +{ write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); @@ -127,27 +159,30 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); } -static hyp_alternate_select(__sysreg_call_restore_host_state, - __sysreg_restore_state, __sysreg_do_nothing, - ARM64_HAS_VIRT_HOST_EXTN); +void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_el1_state(ctxt); + __sysreg_restore_common_state(ctxt); + __sysreg_restore_user_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} -void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt) +void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) { - __sysreg_call_restore_host_state()(ctxt); __sysreg_restore_common_state(ctxt); } -void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt) +void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) { - __sysreg_restore_state(ctxt); __sysreg_restore_common_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); } void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) { u64 *spsr, *sysreg; - if (read_sysreg(hcr_el2) & HCR_RW) + if (!vcpu_el1_is_32bit(vcpu)) return; spsr = vcpu->arch.ctxt.gp_regs.spsr; @@ -161,10 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); - if (__fpsimd_enabled()) - sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); - - if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); } @@ -172,7 +204,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) { u64 *spsr, *sysreg; - if (read_sysreg(hcr_el2) & HCR_RW) + if (!vcpu_el1_is_32bit(vcpu)) return; spsr = vcpu->arch.ctxt.gp_regs.spsr; @@ -186,6 +218,78 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) write_sysreg(sysreg[DACR32_EL2], dacr32_el2); write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); - if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); } + +/** + * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Load system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_load() function + * and loading system register state early avoids having to load them on + * every entry to the VM. + */ +void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + + if (!has_vhe()) + return; + + __sysreg_save_user_state(host_ctxt); + + /* + * Load guest EL1 and user state + * + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_user_state(guest_ctxt); + __sysreg_restore_el1_state(guest_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = true; + + activate_traps_vhe_load(vcpu); +} + +/** + * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Save guest system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_put() function + * and deferring saving system register state until we're no longer running the + * VCPU avoids having to save them on every exit from the VM. + */ +void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + + if (!has_vhe()) + return; + + deactivate_traps_vhe_put(); + + __sysreg_save_el1_state(guest_ctxt); + __sysreg_save_user_state(guest_ctxt); + __sysreg32_save_state(vcpu); + + /* Restore host user state */ + __sysreg_restore_user_state(host_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = false; +} + +void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2) +{ + asm("msr tpidr_el2, %0": : "r" (tpidr_el2)); +} diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c new file mode 100644 index 000000000000..39be799d0417 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm_host.h> +#include <linux/swab.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return !!(read_sysreg_el2(spsr) & COMPAT_PSR_E_BIT); + + return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE); +} + +/* + * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the + * guest. + * + * @vcpu: the offending vcpu + * + * Returns: + * 1: GICV access successfully performed + * 0: Not a GICV access + * -1: Illegal GICV access + */ +int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_dist *vgic = &kvm->arch.vgic; + phys_addr_t fault_ipa; + void __iomem *addr; + int rd; + + /* Build the full address */ + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + + /* If not for GICV, move on */ + if (fault_ipa < vgic->vgic_cpu_base || + fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE)) + return 0; + + /* Reject anything but a 32bit access */ + if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) + return -1; + + /* Not aligned? Don't bother */ + if (fault_ipa & 3) + return -1; + + rd = kvm_vcpu_dabt_get_rd(vcpu); + addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va; + addr += fault_ipa - vgic->vgic_cpu_base; + + if (kvm_vcpu_dabt_iswrite(vcpu)) { + u32 data = vcpu_get_reg(vcpu, rd); + if (__is_be(vcpu)) { + /* guest pre-swabbed data, undo this for writel() */ + data = swab32(data); + } + writel_relaxed(data, addr); + } else { + u32 data = readl_relaxed(addr); + if (__is_be(vcpu)) { + /* guest expects swabbed data */ + data = swab32(data); + } + vcpu_set_reg(vcpu, rd, data); + } + + return 1; +} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 60666a056944..d8e71659ba7e 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -58,7 +58,7 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) exc_offset = LOWER_EL_AArch32_VECTOR; } - return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; + return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) @@ -67,13 +67,13 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; - *vcpu_spsr(vcpu) = cpsr; + vcpu_write_spsr(vcpu, cpsr); - vcpu_sys_reg(vcpu, FAR_EL1) = addr; + vcpu_write_sys_reg(vcpu, addr, FAR_EL1); /* * Build an {i,d}abort, depending on the level and the @@ -94,7 +94,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr if (!is_iabt) esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; - vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; + vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1); } static void inject_undef64(struct kvm_vcpu *vcpu) @@ -102,11 +102,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu) unsigned long cpsr = *vcpu_cpsr(vcpu); u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; - *vcpu_spsr(vcpu) = cpsr; + vcpu_write_spsr(vcpu, cpsr); /* * Build an unknown exception, depending on the instruction @@ -115,7 +115,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) if (kvm_vcpu_trap_il_is32bit(vcpu)) esr |= ESR_ELx_IL; - vcpu_sys_reg(vcpu, ESR_EL1) = esr; + vcpu_write_sys_reg(vcpu, esr, ESR_EL1); } /** @@ -128,7 +128,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) */ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { - if (!(vcpu->arch.hcr_el2 & HCR_RW)) + if (vcpu_el1_is_32bit(vcpu)) kvm_inject_dabt32(vcpu, addr); else inject_abt64(vcpu, false, addr); @@ -144,7 +144,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) */ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { - if (!(vcpu->arch.hcr_el2 & HCR_RW)) + if (vcpu_el1_is_32bit(vcpu)) kvm_inject_pabt32(vcpu, addr); else inject_abt64(vcpu, true, addr); @@ -158,7 +158,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) */ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { - if (!(vcpu->arch.hcr_el2 & HCR_RW)) + if (vcpu_el1_is_32bit(vcpu)) kvm_inject_undef32(vcpu); else inject_undef64(vcpu); @@ -167,7 +167,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) { vcpu_set_vsesr(vcpu, esr); - vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE); + *vcpu_hcr(vcpu) |= HCR_VSE; } /** diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index bbc6ae32e4af..eefe403a2e63 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -141,28 +141,61 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) /* * Return the SPSR for the current mode of the virtual CPU. */ -unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu) +static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu) { unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; switch (mode) { - case COMPAT_PSR_MODE_SVC: - mode = KVM_SPSR_SVC; - break; - case COMPAT_PSR_MODE_ABT: - mode = KVM_SPSR_ABT; - break; - case COMPAT_PSR_MODE_UND: - mode = KVM_SPSR_UND; - break; - case COMPAT_PSR_MODE_IRQ: - mode = KVM_SPSR_IRQ; - break; - case COMPAT_PSR_MODE_FIQ: - mode = KVM_SPSR_FIQ; - break; + case COMPAT_PSR_MODE_SVC: return KVM_SPSR_SVC; + case COMPAT_PSR_MODE_ABT: return KVM_SPSR_ABT; + case COMPAT_PSR_MODE_UND: return KVM_SPSR_UND; + case COMPAT_PSR_MODE_IRQ: return KVM_SPSR_IRQ; + case COMPAT_PSR_MODE_FIQ: return KVM_SPSR_FIQ; + default: BUG(); + } +} + +unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) +{ + int spsr_idx = vcpu_spsr32_mode(vcpu); + + if (!vcpu->arch.sysregs_loaded_on_cpu) + return vcpu_gp_regs(vcpu)->spsr[spsr_idx]; + + switch (spsr_idx) { + case KVM_SPSR_SVC: + return read_sysreg_el1(spsr); + case KVM_SPSR_ABT: + return read_sysreg(spsr_abt); + case KVM_SPSR_UND: + return read_sysreg(spsr_und); + case KVM_SPSR_IRQ: + return read_sysreg(spsr_irq); + case KVM_SPSR_FIQ: + return read_sysreg(spsr_fiq); default: BUG(); } +} + +void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) +{ + int spsr_idx = vcpu_spsr32_mode(vcpu); + + if (!vcpu->arch.sysregs_loaded_on_cpu) { + vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v; + return; + } - return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode]; + switch (spsr_idx) { + case KVM_SPSR_SVC: + write_sysreg_el1(v, spsr); + case KVM_SPSR_ABT: + write_sysreg(v, spsr_abt); + case KVM_SPSR_UND: + write_sysreg(v, spsr_und); + case KVM_SPSR_IRQ: + write_sysreg(v, spsr_irq); + case KVM_SPSR_FIQ: + write_sysreg(v, spsr_fiq); + } } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 50a43c7b97ca..6e3b969391fd 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -35,6 +35,7 @@ #include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_host.h> +#include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> #include <asm/perf_event.h> #include <asm/sysreg.h> @@ -76,6 +77,93 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, return false; } +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg) +{ + if (!vcpu->arch.sysregs_loaded_on_cpu) + goto immediate_read; + + /* + * System registers listed in the switch are not saved on every + * exit from the guest but are only saved on vcpu_put. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the guest cannot modify its + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's + * thread when emulating cross-VCPU communication. + */ + switch (reg) { + case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); + case SCTLR_EL1: return read_sysreg_s(sctlr_EL12); + case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); + case CPACR_EL1: return read_sysreg_s(cpacr_EL12); + case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12); + case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12); + case TCR_EL1: return read_sysreg_s(tcr_EL12); + case ESR_EL1: return read_sysreg_s(esr_EL12); + case AFSR0_EL1: return read_sysreg_s(afsr0_EL12); + case AFSR1_EL1: return read_sysreg_s(afsr1_EL12); + case FAR_EL1: return read_sysreg_s(far_EL12); + case MAIR_EL1: return read_sysreg_s(mair_EL12); + case VBAR_EL1: return read_sysreg_s(vbar_EL12); + case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12); + case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); + case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); + case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); + case AMAIR_EL1: return read_sysreg_s(amair_EL12); + case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12); + case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); + case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); + case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); + case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); + } + +immediate_read: + return __vcpu_sys_reg(vcpu, reg); +} + +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +{ + if (!vcpu->arch.sysregs_loaded_on_cpu) + goto immediate_write; + + /* + * System registers listed in the switch are not restored on every + * entry to the guest but are only restored on vcpu_load. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the the MPIDR should only be + * set once, before running the VCPU, and never changed later. + */ + switch (reg) { + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; + case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return; + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; + case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return; + case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return; + case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return; + case TCR_EL1: write_sysreg_s(val, tcr_EL12); return; + case ESR_EL1: write_sysreg_s(val, esr_EL12); return; + case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return; + case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return; + case FAR_EL1: write_sysreg_s(val, far_EL12); return; + case MAIR_EL1: write_sysreg_s(val, mair_EL12); return; + case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return; + case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; + case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return; + case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return; + } + +immediate_write: + __vcpu_sys_reg(vcpu, reg) = val; +} + /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ static u32 cache_levels; @@ -121,16 +209,26 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { bool was_enabled = vcpu_has_cache_enabled(vcpu); + u64 val; + int reg = r->reg; BUG_ON(!p->is_write); - if (!p->is_aarch32) { - vcpu_sys_reg(vcpu, r->reg) = p->regval; + /* See the 32bit mapping in kvm_host.h */ + if (p->is_aarch32) + reg = r->reg / 2; + + if (!p->is_aarch32 || !p->is_32bit) { + val = p->regval; } else { - if (!p->is_32bit) - vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval); - vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval); + val = vcpu_read_sys_reg(vcpu, reg); + if (r->reg % 2) + val = (p->regval << 32) | (u64)lower_32_bits(val); + else + val = ((u64)upper_32_bits(val) << 32) | + lower_32_bits(p->regval); } + vcpu_write_sys_reg(vcpu, val, reg); kvm_toggle_cache(vcpu, was_enabled); return true; @@ -175,6 +273,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, return read_zero(vcpu, p); } +static bool trap_undef(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + kvm_inject_undefined(vcpu); + return false; +} + static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -231,10 +337,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) { - vcpu_sys_reg(vcpu, r->reg) = p->regval; + vcpu_write_sys_reg(vcpu, p->regval, r->reg); vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; } else { - p->regval = vcpu_sys_reg(vcpu, r->reg); + p->regval = vcpu_read_sys_reg(vcpu, r->reg); } trace_trap_reg(__func__, r->reg, p->is_write, p->regval); @@ -447,7 +553,8 @@ static void reset_wcr(struct kvm_vcpu *vcpu, static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1); + u64 amair = read_sysreg(amair_el1); + vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1); } static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -464,7 +571,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0); mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1); mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2); - vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr; + vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1); } static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -478,12 +585,12 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); - vcpu_sys_reg(vcpu, PMCR_EL0) = val; + __vcpu_sys_reg(vcpu, PMCR_EL0) = val; } static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { - u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0); bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); if (!enabled) @@ -525,14 +632,14 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { /* Only update writeable bits of PMCR */ - val = vcpu_sys_reg(vcpu, PMCR_EL0); + val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; - vcpu_sys_reg(vcpu, PMCR_EL0) = val; + __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); } else { /* PMCR.P & PMCR.C are RAZ */ - val = vcpu_sys_reg(vcpu, PMCR_EL0) + val = __vcpu_sys_reg(vcpu, PMCR_EL0) & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C); p->regval = val; } @@ -550,10 +657,10 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) - vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; + __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; else /* return PMSELR.SEL field */ - p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0) + p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; return true; @@ -586,7 +693,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) { u64 pmcr, val; - pmcr = vcpu_sys_reg(vcpu, PMCR_EL0); + pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0); val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) { kvm_inject_undefined(vcpu); @@ -611,7 +718,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, if (pmu_access_event_counter_el0_disabled(vcpu)) return false; - idx = vcpu_sys_reg(vcpu, PMSELR_EL0) + idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; } else if (r->Op2 == 0) { /* PMCCNTR_EL0 */ @@ -666,7 +773,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) { /* PMXEVTYPER_EL0 */ - idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; + idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; reg = PMEVTYPER0_EL0 + idx; } else if (r->CRn == 14 && (r->CRm & 12) == 12) { idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); @@ -684,9 +791,9 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); - vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; + __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; } else { - p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; + p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; } return true; @@ -708,15 +815,15 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = p->regval & mask; if (r->Op2 & 0x1) { /* accessing PMCNTENSET_EL0 */ - vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; kvm_pmu_enable_counter(vcpu, val); } else { /* accessing PMCNTENCLR_EL0 */ - vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; kvm_pmu_disable_counter(vcpu, val); } } else { - p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; } return true; @@ -740,12 +847,12 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->Op2 & 0x1) /* accessing PMINTENSET_EL1 */ - vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; + __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; else /* accessing PMINTENCLR_EL1 */ - vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; + __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; } else { - p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; } return true; @@ -765,12 +872,12 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); else /* accessing PMOVSCLR_EL0 */ - vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); } else { - p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; } return true; @@ -807,10 +914,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; } - vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval - & ARMV8_PMU_USERENR_MASK; + __vcpu_sys_reg(vcpu, PMUSERENR_EL0) = + p->regval & ARMV8_PMU_USERENR_MASK; } else { - p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0) + p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0) & ARMV8_PMU_USERENR_MASK; } @@ -889,10 +996,14 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz) if (id == SYS_ID_AA64PFR0_EL1) { if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT)) - pr_err_once("kvm [%i]: SVE unsupported for guests, suppressing\n", - task_pid_nr(current)); + kvm_debug("SVE unsupported for guests, suppressing\n"); val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); + } else if (id == SYS_ID_AA64MMFR1_EL1) { + if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT)) + kvm_debug("LORegions unsupported for guests, suppressing\n"); + + val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT); } return val; @@ -1178,6 +1289,12 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, + { SYS_DESC(SYS_LORSA_EL1), trap_undef }, + { SYS_DESC(SYS_LOREA_EL1), trap_undef }, + { SYS_DESC(SYS_LORN_EL1), trap_undef }, + { SYS_DESC(SYS_LORC_EL1), trap_undef }, + { SYS_DESC(SYS_LORID_EL1), trap_undef }, + { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 }, @@ -1545,6 +1662,11 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, + /* CNTP_TVAL */ + { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval }, + /* CNTP_CTL */ + { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl }, + /* PMEVCNTRn */ PMU_PMEVCNTR(0), PMU_PMEVCNTR(1), @@ -1618,6 +1740,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval }, }; /* Target specific emulation tables */ @@ -2194,7 +2317,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (r->get_user) return (r->get_user)(vcpu, r, reg, uaddr); - return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); + return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id); } int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) @@ -2215,7 +2338,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (r->set_user) return (r->set_user)(vcpu, r, reg, uaddr); - return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); + return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); } static unsigned int num_demux_regs(void) @@ -2421,6 +2544,6 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) reset_sys_reg_descs(vcpu, table, num); for (num = 1; num < NR_SYS_REGS; num++) - if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242) - panic("Didn't reset vcpu_sys_reg(%zi)", num); + if (__vcpu_sys_reg(vcpu, num) == 0x4242424242424242) + panic("Didn't reset __vcpu_sys_reg(%zi)", num); } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 060f5348ef25..cd710f8b63e0 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -89,14 +89,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu, { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; + __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; } static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - vcpu_sys_reg(vcpu, r->reg) = r->val; + __vcpu_sys_reg(vcpu, r->reg) = r->val; } static inline int cmp_sys_reg(const struct sys_reg_desc *i1, diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 969ade1d333d..ddb8497d18d6 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -38,13 +38,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu, if (p->is_write) return ignore_write(vcpu, p); - p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1); + p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); return true; } static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); + __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); } /* diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c new file mode 100644 index 000000000000..c712a7376bc1 --- /dev/null +++ b/arch/arm64/kvm/va_layout.c @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2017 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm_host.h> +#include <linux/random.h> +#include <linux/memblock.h> +#include <asm/alternative.h> +#include <asm/debug-monitors.h> +#include <asm/insn.h> +#include <asm/kvm_mmu.h> + +/* + * The LSB of the random hyp VA tag or 0 if no randomization is used. + */ +static u8 tag_lsb; +/* + * The random hyp VA tag value with the region bit if hyp randomization is used + */ +static u64 tag_val; +static u64 va_mask; + +static void compute_layout(void) +{ + phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); + u64 hyp_va_msb; + int kva_msb; + + /* Where is my RAM region? */ + hyp_va_msb = idmap_addr & BIT(VA_BITS - 1); + hyp_va_msb ^= BIT(VA_BITS - 1); + + kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ + (u64)(high_memory - 1)); + + if (kva_msb == (VA_BITS - 1)) { + /* + * No space in the address, let's compute the mask so + * that it covers (VA_BITS - 1) bits, and the region + * bit. The tag stays set to zero. + */ + va_mask = BIT(VA_BITS - 1) - 1; + va_mask |= hyp_va_msb; + } else { + /* + * We do have some free bits to insert a random tag. + * Hyp VAs are now created from kernel linear map VAs + * using the following formula (with V == VA_BITS): + * + * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 + * --------------------------------------------------------- + * | 0000000 | hyp_va_msb | random tag | kern linear VA | + */ + tag_lsb = kva_msb; + va_mask = GENMASK_ULL(tag_lsb - 1, 0); + tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb); + tag_val |= hyp_va_msb; + tag_val >>= tag_lsb; + } +} + +static u32 compute_instruction(int n, u32 rd, u32 rn) +{ + u32 insn = AARCH64_BREAK_FAULT; + + switch (n) { + case 0: + insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_AND, + AARCH64_INSN_VARIANT_64BIT, + rn, rd, va_mask); + break; + + case 1: + /* ROR is a variant of EXTR with Rm = Rn */ + insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT, + rn, rn, rd, + tag_lsb); + break; + + case 2: + insn = aarch64_insn_gen_add_sub_imm(rd, rn, + tag_val & GENMASK(11, 0), + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_ADSB_ADD); + break; + + case 3: + insn = aarch64_insn_gen_add_sub_imm(rd, rn, + tag_val & GENMASK(23, 12), + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_ADSB_ADD); + break; + + case 4: + /* ROR is a variant of EXTR with Rm = Rn */ + insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT, + rn, rn, rd, 64 - tag_lsb); + break; + } + + return insn; +} + +void __init kvm_update_va_mask(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + int i; + + BUG_ON(nr_inst != 5); + + if (!has_vhe() && !va_mask) + compute_layout(); + + for (i = 0; i < nr_inst; i++) { + u32 rd, rn, insn, oinsn; + + /* + * VHE doesn't need any address translation, let's NOP + * everything. + * + * Alternatively, if we don't have any spare bits in + * the address, NOP everything after masking that + * kernel VA. + */ + if (has_vhe() || (!tag_lsb && i > 0)) { + updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); + continue; + } + + oinsn = le32_to_cpu(origptr[i]); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); + rn = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, oinsn); + + insn = compute_instruction(i, rd, rn); + BUG_ON(insn == AARCH64_BREAK_FAULT); + + updptr[i] = cpu_to_le32(insn); + } +} + +void *__kvm_bp_vect_base; +int __kvm_harden_el2_vector_slot; + +void kvm_patch_vector_branch(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u64 addr; + u32 insn; + + BUG_ON(nr_inst != 5); + + if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { + WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)); + return; + } + + if (!va_mask) + compute_layout(); + + /* + * Compute HYP VA by using the same computation as kern_hyp_va() + */ + addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector); + addr &= va_mask; + addr |= tag_val << tag_lsb; + + /* Use PC[10:7] to branch to the same vector in KVM */ + addr |= ((u64)origptr & GENMASK_ULL(10, 7)); + + /* + * Branch to the second instruction in the vectors in order to + * avoid the initial store on the stack (which we already + * perform in the hardening vectors). + */ + addr += AARCH64_INSN_SIZE; + + /* stp x0, x1, [sp, #-16]! */ + insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0, + AARCH64_INSN_REG_1, + AARCH64_INSN_REG_SP, + -16, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX); + *updptr++ = cpu_to_le32(insn); + + /* movz x0, #(addr & 0xffff) */ + insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, + (u16)addr, + 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); + + /* movk x0, #((addr >> 16) & 0xffff), lsl #16 */ + insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, + (u16)(addr >> 16), + 16, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* movk x0, #((addr >> 32) & 0xffff), lsl #32 */ + insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, + (u16)(addr >> 32), + 32, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* br x0 */ + insn = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_0, + AARCH64_INSN_BRANCH_NOLINK); + *updptr++ = cpu_to_le32(insn); +} diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 0ead8a1d1679..137710f4dac3 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -19,5 +19,9 @@ CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ -fcall-saved-x18 -fomit-frame-pointer CFLAGS_REMOVE_atomic_ll_sc.o := -pg +GCOV_PROFILE_atomic_ll_sc.o := n +KASAN_SANITIZE_atomic_ll_sc.o := n +KCOV_INSTRUMENT_atomic_ll_sc.o := n +UBSAN_SANITIZE_atomic_ll_sc.o := n lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index d3db9b2cd479..0fdff97794de 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -1,17 +1,6 @@ -/* - * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2017-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include <linux/linkage.h> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a96ec0181818..db01f2709842 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -508,16 +508,6 @@ static int __init arm64_dma_init(void) } arch_initcall(arm64_dma_init); -#define PREALLOC_DMA_DEBUG_ENTRIES 4096 - -static int __init dma_debug_do_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(dma_debug_do_init); - - #ifdef CONFIG_IOMMU_DMA #include <linux/dma-iommu.h> #include <linux/platform_device.h> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4165485e8b6e..576f15153080 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -293,6 +293,57 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, static void __do_user_fault(struct siginfo *info, unsigned int esr) { current->thread.fault_address = (unsigned long)info->si_addr; + + /* + * If the faulting address is in the kernel, we must sanitize the ESR. + * From userspace's point of view, kernel-only mappings don't exist + * at all, so we report them as level 0 translation faults. + * (This is not quite the way that "no mapping there at all" behaves: + * an alignment fault not caused by the memory type would take + * precedence over translation fault for a real access to empty + * space. Unfortunately we can't easily distinguish "alignment fault + * not caused by memory type" from "alignment fault caused by memory + * type", so we ignore this wrinkle and just return the translation + * fault.) + */ + if (current->thread.fault_address >= TASK_SIZE) { + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_DABT_LOW: + /* + * These bits provide only information about the + * faulting instruction, which userspace knows already. + * We explicitly clear bits which are architecturally + * RES0 in case they are given meanings in future. + * We always report the ESR as if the fault was taken + * to EL1 and so ISV and the bits in ISS[23:14] are + * clear. (In fact it always will be a fault to EL1.) + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL | + ESR_ELx_CM | ESR_ELx_WNR; + esr |= ESR_ELx_FSC_FAULT; + break; + case ESR_ELx_EC_IABT_LOW: + /* + * Claim a level 0 translation fault. + * All other bits are architecturally RES0 for faults + * reported with that DFSC value, so we clear them. + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL; + esr |= ESR_ELx_FSC_FAULT; + break; + default: + /* + * This should never happen (entry.S only brings us + * into this code for insn and data aborts from a lower + * exception level). Fail safe by not providing an ESR + * context record at all. + */ + WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); + esr = 0; + break; + } + } + current->thread.fault_code = esr; arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); } @@ -305,11 +356,12 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re */ if (user_mode(regs)) { const struct fault_info *inf = esr_to_fault_info(esr); - struct siginfo si = { - .si_signo = inf->sig, - .si_code = inf->code, - .si_addr = (void __user *)addr, - }; + struct siginfo si; + + clear_siginfo(&si); + si.si_signo = inf->sig; + si.si_code = inf->code; + si.si_addr = (void __user *)addr; __do_user_fault(&si, esr); } else { @@ -583,6 +635,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) nmi_exit(); } + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; @@ -687,6 +740,7 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, show_pte(addr); } + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; @@ -729,6 +783,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, local_irq_enable(); } + clear_siginfo(&info); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRALN; @@ -772,7 +827,6 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, struct pt_regs *regs) { const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); - struct siginfo info; int rv; /* @@ -788,6 +842,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, if (!inf->fn(addr, esr, regs)) { rv = 1; } else { + struct siginfo info; + + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index e36ed5087b5c..1059884f9a6f 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -58,7 +58,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, flush_ptrace_access(vma, page, uaddr, dst, len); } -void __sync_icache_dcache(pte_t pte, unsigned long addr) +void __sync_icache_dcache(pte_t pte) { struct page *page = pte_page(pte); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9f3c47acf8ff..1b18b4722420 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -646,8 +646,10 @@ static int keep_initrd __initdata; void __init free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) + if (!keep_initrd) { free_reserved_area((void *)start, (void *)end, 0, "initrd"); + memblock_free(__virt_to_phys(start), end - start); + } } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index dabfc1ecda3d..12145874c02b 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -204,7 +204,7 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); kasan_map_populate(kimg_shadow_start, kimg_shadow_end, - pfn_to_nid(virt_to_pfn(lm_alias(_text)))); + early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, (void *)mod_shadow_start); @@ -224,7 +224,7 @@ void __init kasan_init(void) kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), (unsigned long)kasan_mem_to_shadow(end), - pfn_to_nid(virt_to_pfn(start))); + early_pfn_to_nid(virt_to_pfn(start))); } /* diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index decccffb03ca..842c8a5fcd53 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -38,12 +38,12 @@ #define MIN_GAP (SZ_128M) #define MAX_GAP (STACK_TOP/6*5) -static int mmap_is_legacy(void) +static int mmap_is_legacy(struct rlimit *rlim_stack) { if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + if (rlim_stack->rlim_cur == RLIM_INFINITY) return 1; return sysctl_legacy_va_layout; @@ -62,9 +62,9 @@ unsigned long arch_mmap_rnd(void) return rnd << PAGE_SHIFT; } -static unsigned long mmap_base(unsigned long rnd) +static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { - unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap; /* Values close to RLIM_INFINITY can overflow. */ @@ -83,7 +83,7 @@ static unsigned long mmap_base(unsigned long rnd) * This function, called very early during the creation of a new process VM * image, sets up which VM layout function to use: */ -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = 0UL; @@ -94,11 +94,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * Fall back to the standard layout if the personality bit is set, or * if the expected stack growth is unlimited: */ - if (mmap_is_legacy()) { + if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(random_factor); + mm->mmap_base = mmap_base(random_factor, rlim_stack); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 2dbb2c9f1ec1..493ff75670ff 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -933,13 +933,15 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pud_present(READ_ONCE(*pudp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), + pud_val(new_pud))) return 0; BUG_ON(phys & ~PUD_MASK); - set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); + set_pud(pudp, new_pud); return 1; } @@ -947,13 +949,15 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pmd_present(READ_ONCE(*pmdp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) return 0; BUG_ON(phys & ~PMD_MASK); - set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); + set_pmd(pmdp, new_pmd); return 1; } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a93350451e8e..a6fdaea07c63 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -21,7 +21,6 @@ #include <linux/bpf.h> #include <linux/filter.h> #include <linux/printk.h> -#include <linux/skbuff.h> #include <linux/slab.h> #include <asm/byteorder.h> @@ -80,23 +79,66 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx) ctx->idx++; } +static inline void emit_a64_mov_i(const int is64, const int reg, + const s32 val, struct jit_ctx *ctx) +{ + u16 hi = val >> 16; + u16 lo = val & 0xffff; + + if (hi & 0x8000) { + if (hi == 0xffff) { + emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); + } else { + emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); + if (lo != 0xffff) + emit(A64_MOVK(is64, reg, lo, 0), ctx); + } + } else { + emit(A64_MOVZ(is64, reg, lo, 0), ctx); + if (hi) + emit(A64_MOVK(is64, reg, hi, 16), ctx); + } +} + +static int i64_i16_blocks(const u64 val, bool inverse) +{ + return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) + + (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) + + (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) + + (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000)); +} + static inline void emit_a64_mov_i64(const int reg, const u64 val, struct jit_ctx *ctx) { - u64 tmp = val; - int shift = 0; - - emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx); - tmp >>= 16; - shift += 16; - while (tmp) { - if (tmp & 0xffff) - emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx); - tmp >>= 16; - shift += 16; + u64 nrm_tmp = val, rev_tmp = ~val; + bool inverse; + int shift; + + if (!(nrm_tmp >> 32)) + return emit_a64_mov_i(0, reg, (u32)val, ctx); + + inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false); + shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) : + (fls64(nrm_tmp) - 1)), 16), 0); + if (inverse) + emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx); + else + emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); + shift -= 16; + while (shift >= 0) { + if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000)) + emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); + shift -= 16; } } +/* + * This is an unoptimized 64 immediate emission used for BPF to BPF call + * addresses. It will always do a full 64 bit decomposition as otherwise + * more complexity in the last extra pass is required since we previously + * reserved 4 instructions for the address. + */ static inline void emit_addr_mov_i64(const int reg, const u64 val, struct jit_ctx *ctx) { @@ -111,26 +153,6 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } -static inline void emit_a64_mov_i(const int is64, const int reg, - const s32 val, struct jit_ctx *ctx) -{ - u16 hi = val >> 16; - u16 lo = val & 0xffff; - - if (hi & 0x8000) { - if (hi == 0xffff) { - emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); - } else { - emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); - emit(A64_MOVK(is64, reg, lo, 0), ctx); - } - } else { - emit(A64_MOVZ(is64, reg, lo, 0), ctx); - if (hi) - emit(A64_MOVK(is64, reg, hi, 16), ctx); - } -} - static inline int bpf2a64_offset(int bpf_to, int bpf_from, const struct jit_ctx *ctx) { @@ -163,7 +185,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) /* Tail call offset to jump into */ #define PROLOGUE_OFFSET 7 -static int build_prologue(struct jit_ctx *ctx) +static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { const struct bpf_prog *prog = ctx->prog; const u8 r6 = bpf2a64[BPF_REG_6]; @@ -188,7 +210,7 @@ static int build_prologue(struct jit_ctx *ctx) * | ... | BPF prog stack * | | * +-----+ <= (BPF_FP - prog->aux->stack_depth) - * |RSVD | JIT scratchpad + * |RSVD | padding * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size) * | | * | ... | Function call stack @@ -210,19 +232,19 @@ static int build_prologue(struct jit_ctx *ctx) /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); - /* Initialize tail_call_cnt */ - emit(A64_MOVZ(1, tcc, 0, 0), ctx); + if (!ebpf_from_cbpf) { + /* Initialize tail_call_cnt */ + emit(A64_MOVZ(1, tcc, 0, 0), ctx); - cur_offset = ctx->idx - idx0; - if (cur_offset != PROLOGUE_OFFSET) { - pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", - cur_offset, PROLOGUE_OFFSET); - return -1; + cur_offset = ctx->idx - idx0; + if (cur_offset != PROLOGUE_OFFSET) { + pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", + cur_offset, PROLOGUE_OFFSET); + return -1; + } } - /* 4 byte extra for skb_copy_bits buffer */ - ctx->stack_size = prog->aux->stack_depth + 4; - ctx->stack_size = STACK_ALIGN(ctx->stack_size); + ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth); /* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); @@ -723,71 +745,6 @@ emit_cond_jmp: emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); break; - /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ - case BPF_LD | BPF_ABS | BPF_W: - case BPF_LD | BPF_ABS | BPF_H: - case BPF_LD | BPF_ABS | BPF_B: - /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ - case BPF_LD | BPF_IND | BPF_W: - case BPF_LD | BPF_IND | BPF_H: - case BPF_LD | BPF_IND | BPF_B: - { - const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */ - const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */ - const u8 fp = bpf2a64[BPF_REG_FP]; - const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */ - const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */ - const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */ - const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */ - const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */ - int size; - - emit(A64_MOV(1, r1, r6), ctx); - emit_a64_mov_i(0, r2, imm, ctx); - if (BPF_MODE(code) == BPF_IND) - emit(A64_ADD(0, r2, r2, src), ctx); - switch (BPF_SIZE(code)) { - case BPF_W: - size = 4; - break; - case BPF_H: - size = 2; - break; - case BPF_B: - size = 1; - break; - default: - return -EINVAL; - } - emit_a64_mov_i64(r3, size, ctx); - emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx); - emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx); - emit(A64_BLR(r5), ctx); - emit(A64_MOV(1, r0, A64_R(0)), ctx); - - jmp_offset = epilogue_offset(ctx); - check_imm19(jmp_offset); - emit(A64_CBZ(1, r0, jmp_offset), ctx); - emit(A64_MOV(1, r5, r0), ctx); - switch (BPF_SIZE(code)) { - case BPF_W: - emit(A64_LDR32(r0, r5, A64_ZR), ctx); -#ifndef CONFIG_CPU_BIG_ENDIAN - emit(A64_REV32(0, r0, r0), ctx); -#endif - break; - case BPF_H: - emit(A64_LDRH(r0, r5, A64_ZR), ctx); -#ifndef CONFIG_CPU_BIG_ENDIAN - emit(A64_REV16(0, r0, r0), ctx); -#endif - break; - case BPF_B: - emit(A64_LDRB(r0, r5, A64_ZR), ctx); - break; - } - break; - } default: pr_err_once("unknown opcode %02x\n", code); return -EINVAL; @@ -851,6 +808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; struct arm64_jit_data *jit_data; + bool was_classic = bpf_prog_was_classic(prog); bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; @@ -905,7 +863,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } - if (build_prologue(&ctx)) { + if (build_prologue(&ctx, was_classic)) { prog = orig_prog; goto out_off; } @@ -928,7 +886,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) skip_init_ctx: ctx.idx = 0; - build_prologue(&ctx); + build_prologue(&ctx, was_classic); if (build_body(&ctx)) { bpf_jit_binary_free(header); |