summaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig38
-rw-r--r--arch/arm64/Makefile6
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi83
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts44
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts12
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl.dtsi61
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxm.dtsi17
-rw-r--r--arch/arm64/boot/dts/arm/juno-motherboard.dtsi2
-rw-r--r--arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi80
-rw-r--r--arch/arm64/boot/dts/exynos/exynos5433.dtsi2
-rw-r--r--arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts1
-rw-r--r--arch/arm64/boot/dts/marvell/armada-cp110.dtsi7
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi26
-rw-r--r--arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi32
-rw-r--r--arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi61
-rw-r--r--arch/arm64/boot/dts/qcom/msm8996.dtsi10
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi2
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts8
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi2
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi2
-rw-r--r--arch/arm64/crypto/Kconfig6
-rw-r--r--arch/arm64/crypto/Makefile5
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S150
-rw-r--r--arch/arm64/crypto/aes-ce.S15
-rw-r--r--arch/arm64/crypto/aes-modes.S331
-rw-r--r--arch/arm64/crypto/aes-neonbs-core.S305
-rw-r--r--arch/arm64/crypto/crc32-ce-core.S40
-rw-r--r--arch/arm64/crypto/crct10dif-ce-core.S32
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S113
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c28
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S42
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S37
-rw-r--r--arch/arm64/crypto/sha256-core.S_shipped12
-rw-r--r--arch/arm64/crypto/sha3-ce-core.S77
-rw-r--r--arch/arm64/crypto/sha512-armv8.pl12
-rw-r--r--arch/arm64/crypto/sha512-ce-core.S27
-rw-r--r--arch/arm64/crypto/sha512-core.S_shipped12
-rw-r--r--arch/arm64/crypto/sm4-ce-core.S36
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c73
-rw-r--r--arch/arm64/include/asm/alternative.h41
-rw-r--r--arch/arm64/include/asm/assembler.h136
-rw-r--r--arch/arm64/include/asm/atomic_lse.h24
-rw-r--r--arch/arm64/include/asm/cacheflush.h6
-rw-r--r--arch/arm64/include/asm/compat.h43
-rw-r--r--arch/arm64/include/asm/cpucaps.h15
-rw-r--r--arch/arm64/include/asm/cputype.h6
-rw-r--r--arch/arm64/include/asm/insn.h16
-rw-r--r--arch/arm64/include/asm/kvm_arm.h6
-rw-r--r--arch/arm64/include/asm/kvm_asm.h19
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h78
-rw-r--r--arch/arm64/include/asm/kvm_host.h56
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h29
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h181
-rw-r--r--arch/arm64/include/asm/memory.h6
-rw-r--r--arch/arm64/include/asm/mmu.h8
-rw-r--r--arch/arm64/include/asm/module.h2
-rw-r--r--arch/arm64/include/asm/pci.h5
-rw-r--r--arch/arm64/include/asm/pgtable.h4
-rw-r--r--arch/arm64/include/asm/spinlock.h5
-rw-r--r--arch/arm64/include/asm/stat.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h6
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h6
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/alternative.c43
-rw-r--r--arch/arm64/kernel/arm64ksyms.c8
-rw-r--r--arch/arm64/kernel/asm-offsets.c4
-rw-r--r--arch/arm64/kernel/bpi.S83
-rw-r--r--arch/arm64/kernel/cpu_errata.c97
-rw-r--r--arch/arm64/kernel/cpufeature.c20
-rw-r--r--arch/arm64/kernel/fpsimd.c2
-rw-r--r--arch/arm64/kernel/head.S7
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c1
-rw-r--r--arch/arm64/kernel/insn.c190
-rw-r--r--arch/arm64/kernel/module-plts.c2
-rw-r--r--arch/arm64/kernel/module.c2
-rw-r--r--arch/arm64/kernel/perf_regs.c2
-rw-r--r--arch/arm64/kernel/ptrace.c20
-rw-r--r--arch/arm64/kernel/sys_compat.c1
-rw-r--r--arch/arm64/kernel/traps.c6
-rw-r--r--arch/arm64/kvm/Kconfig3
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/debug.c29
-rw-r--r--arch/arm64/kvm/guest.c14
-rw-r--r--arch/arm64/kvm/hyp-init.S1
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c88
-rw-r--r--arch/arm64/kvm/hyp/entry.S18
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S150
-rw-r--r--arch/arm64/kvm/hyp/switch.c388
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c172
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c92
-rw-r--r--arch/arm64/kvm/inject_fault.c24
-rw-r--r--arch/arm64/kvm/regmap.c67
-rw-r--r--arch/arm64/kvm/sys_regs.c201
-rw-r--r--arch/arm64/kvm/sys_regs.h4
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c4
-rw-r--r--arch/arm64/kvm/va_layout.c227
-rw-r--r--arch/arm64/lib/Makefile4
-rw-r--r--arch/arm64/lib/tishift.S15
-rw-r--r--arch/arm64/mm/dma-mapping.c10
-rw-r--r--arch/arm64/mm/fault.c69
-rw-r--r--arch/arm64/mm/flush.c2
-rw-r--r--arch/arm64/mm/init.c4
-rw-r--r--arch/arm64/mm/kasan_init.c4
-rw-r--r--arch/arm64/mm/mmap.c14
-rw-r--r--arch/arm64/mm/mmu.c16
-rw-r--r--arch/arm64/net/bpf_jit_comp.c180
112 files changed, 3451 insertions, 1411 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 177be0d1d090..b25ed7834f6c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -105,7 +105,6 @@ config ARM64
select HAVE_CONTEXT_TRACKING
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -133,6 +132,8 @@ config ARM64
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA
select MULTI_IRQ_HANDLER
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
select NO_BOOTMEM
select OF
select OF_EARLY_FLATTREE
@@ -142,6 +143,7 @@ config ARM64
select POWER_SUPPLY
select REFCOUNT_FULL
select SPARSE_IRQ
+ select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
help
@@ -150,9 +152,6 @@ config ARM64
config 64BIT
def_bool y
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool y
-
config MMU
def_bool y
@@ -237,24 +236,9 @@ config ZONE_DMA32
config HAVE_GENERIC_GUP
def_bool y
-config ARCH_DMA_ADDR_T_64BIT
- def_bool y
-
-config NEED_DMA_MAP_STATE
- def_bool y
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config SMP
def_bool y
-config SWIOTLB
- def_bool y
-
-config IOMMU_HELPER
- def_bool SWIOTLB
-
config KERNEL_MODE_NEON
def_bool y
@@ -922,6 +906,22 @@ config HARDEN_BRANCH_PREDICTOR
If unsure, say Y.
+config HARDEN_EL2_VECTORS
+ bool "Harden EL2 vector mapping against system register leak" if EXPERT
+ default y
+ help
+ Speculation attacks against some high-performance processors can
+ be used to leak privileged information such as the vector base
+ register, resulting in a potential defeat of the EL2 layout
+ randomization.
+
+ This config option will map the vectors to a fixed location,
+ independent of the EL2 code mapping, so that revealing VBAR_EL2
+ to an attacker does not give away any extra information. This
+ only gets enabled on affected CPUs.
+
+ If unsure, say Y.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 15402861bb59..3c353b4715dc 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -56,7 +56,11 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
+ifeq ($(cc-name),clang)
+KBUILD_CFLAGS += -DCONFIG_ARCH_SUPPORTS_INT128
+else
KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128)
+endif
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
@@ -74,7 +78,7 @@ LDFLAGS += -maarch64linux
UTS_MACHINE := aarch64
endif
-CHECKFLAGS += -D__aarch64__ -m64
+CHECKFLAGS += -D__aarch64__
ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index c89d0c307f8d..e6b059378dc0 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -17,6 +17,7 @@
/dts-v1/;
#include <dt-bindings/reset/altr,rst-mgr-s10.h>
#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/clock/stratix10-clock.h>
/ {
compatible = "altr,socfpga-stratix10";
@@ -92,9 +93,32 @@
interrupt-parent = <&intc>;
ranges = <0 0 0 0xffffffff>;
- clkmgr@ffd1000 {
- compatible = "altr,clk-mgr";
+ clkmgr: clock-controller@ffd10000 {
+ compatible = "intel,stratix10-clkmgr";
reg = <0xffd10000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ clocks {
+ cb_intosc_hs_div2_clk: cb-intosc-hs-div2-clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ };
+
+ cb_intosc_ls_clk: cb-intosc-ls-clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ };
+
+ f2s_free_clk: f2s-free-clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ };
+
+ osc1: osc1 {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ };
};
gmac0: ethernet@ff800000 {
@@ -105,6 +129,8 @@
mac-address = [00 00 00 00 00 00];
resets = <&rst EMAC0_RESET>;
reset-names = "stmmaceth";
+ clocks = <&clkmgr STRATIX10_EMAC0_CLK>;
+ clock-names = "stmmaceth";
status = "disabled";
};
@@ -116,6 +142,8 @@
mac-address = [00 00 00 00 00 00];
resets = <&rst EMAC1_RESET>;
reset-names = "stmmaceth";
+ clocks = <&clkmgr STRATIX10_EMAC1_CLK>;
+ clock-names = "stmmaceth";
status = "disabled";
};
@@ -127,6 +155,8 @@
mac-address = [00 00 00 00 00 00];
resets = <&rst EMAC2_RESET>;
reset-names = "stmmaceth";
+ clocks = <&clkmgr STRATIX10_EMAC2_CLK>;
+ clock-names = "stmmaceth";
status = "disabled";
};
@@ -177,6 +207,7 @@
reg = <0xffc02800 0x100>;
interrupts = <0 103 4>;
resets = <&rst I2C0_RESET>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
status = "disabled";
};
@@ -187,6 +218,7 @@
reg = <0xffc02900 0x100>;
interrupts = <0 104 4>;
resets = <&rst I2C1_RESET>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
status = "disabled";
};
@@ -197,6 +229,7 @@
reg = <0xffc02a00 0x100>;
interrupts = <0 105 4>;
resets = <&rst I2C2_RESET>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
status = "disabled";
};
@@ -207,6 +240,7 @@
reg = <0xffc02b00 0x100>;
interrupts = <0 106 4>;
resets = <&rst I2C3_RESET>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
status = "disabled";
};
@@ -217,6 +251,7 @@
reg = <0xffc02c00 0x100>;
interrupts = <0 107 4>;
resets = <&rst I2C4_RESET>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
status = "disabled";
};
@@ -229,6 +264,9 @@
fifo-depth = <0x400>;
resets = <&rst SDMMC_RESET>;
reset-names = "reset";
+ clocks = <&clkmgr STRATIX10_L4_MP_CLK>,
+ <&clkmgr STRATIX10_SDMMC_CLK>;
+ clock-names = "biu", "ciu";
status = "disabled";
};
@@ -237,6 +275,25 @@
reg = <0xffe00000 0x100000>;
};
+ pdma: pdma@ffda0000 {
+ compatible = "arm,pl330", "arm,primecell";
+ reg = <0xffda0000 0x1000>;
+ interrupts = <0 81 4>,
+ <0 82 4>,
+ <0 83 4>,
+ <0 84 4>,
+ <0 85 4>,
+ <0 86 4>,
+ <0 87 4>,
+ <0 88 4>,
+ <0 89 4>;
+ #dma-cells = <1>;
+ #dma-channels = <8>;
+ #dma-requests = <32>;
+ clocks = <&clkmgr STRATIX10_L4_MAIN_CLK>;
+ clock-names = "apb_pclk";
+ };
+
rst: rstmgr@ffd11000 {
#reset-cells = <1>;
compatible = "altr,rst-mgr";
@@ -288,24 +345,32 @@
compatible = "snps,dw-apb-timer";
interrupts = <0 113 4>;
reg = <0xffc03000 0x100>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
+ clock-names = "timer";
};
timer1: timer1@ffc03100 {
compatible = "snps,dw-apb-timer";
interrupts = <0 114 4>;
reg = <0xffc03100 0x100>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
+ clock-names = "timer";
};
timer2: timer2@ffd00000 {
compatible = "snps,dw-apb-timer";
interrupts = <0 115 4>;
reg = <0xffd00000 0x100>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
+ clock-names = "timer";
};
timer3: timer3@ffd00100 {
compatible = "snps,dw-apb-timer";
interrupts = <0 116 4>;
reg = <0xffd00100 0x100>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
+ clock-names = "timer";
};
uart0: serial0@ffc02000 {
@@ -315,6 +380,7 @@
reg-shift = <2>;
reg-io-width = <4>;
resets = <&rst UART0_RESET>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
status = "disabled";
};
@@ -325,6 +391,7 @@
reg-shift = <2>;
reg-io-width = <4>;
resets = <&rst UART1_RESET>;
+ clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
status = "disabled";
};
@@ -387,5 +454,17 @@
resets = <&rst WATCHDOG3_RESET>;
status = "disabled";
};
+
+ eccmgr {
+ compatible = "altr,socfpga-s10-ecc-manager";
+ interrupts = <0 15 4>, <0 95 4>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ sdramedac {
+ compatible = "altr,sdram-edac-s10";
+ interrupts = <16 4>, <48 4>;
+ };
+ };
};
};
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
index eaf13fe29287..f9b1ef12db48 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
@@ -50,6 +50,21 @@
/* We expect the bootloader to fill in the reg */
reg = <0 0 0 0>;
};
+
+ ref_033v: 033-v-ref {
+ compatible = "regulator-fixed";
+ regulator-name = "0.33V";
+ regulator-min-microvolt = <330000>;
+ regulator-max-microvolt = <330000>;
+ };
+
+ soc {
+ clocks {
+ osc1 {
+ clock-frequency = <25000000>;
+ };
+ };
+ };
};
&gpio1 {
@@ -79,7 +94,7 @@
rxd2-skew-ps = <420>; /* 0ps */
rxd3-skew-ps = <420>; /* 0ps */
txen-skew-ps = <0>; /* -420ps */
- txc-skew-ps = <1860>; /* 960ps */
+ txc-skew-ps = <900>; /* 0ps */
rxdv-skew-ps = <420>; /* 0ps */
rxc-skew-ps = <1680>; /* 780ps */
};
@@ -105,3 +120,30 @@
&watchdog0 {
status = "okay";
};
+
+&i2c1 {
+ status = "okay";
+ clock-frequency = <100000>;
+
+ adc@14 {
+ compatible = "lltc,ltc2497";
+ reg = <0x14>;
+ vref-supply = <&ref_033v>;
+ };
+
+ temp@4c {
+ compatible = "maxim,max1619";
+ reg = <0x4c>;
+ };
+
+ eeprom@51 {
+ compatible = "atmel,24c32";
+ reg = <0x51>;
+ pagesize = <32>;
+ };
+
+ rtc@68 {
+ compatible = "dallas,ds1339";
+ reg = <0x68>;
+ };
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
index 4eef36b22538..88e712ea757a 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi
@@ -212,3 +212,7 @@
pinctrl-0 = <&uart_ao_a_pins>;
pinctrl-names = "default";
};
+
+&usb0 {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
index 22bf37404ff1..3e3eb31748a3 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts
@@ -271,3 +271,15 @@
pinctrl-0 = <&uart_ao_a_pins>;
pinctrl-names = "default";
};
+
+&usb0 {
+ status = "okay";
+};
+
+&usb2_phy0 {
+ /*
+ * even though the schematics don't show it:
+ * HDMI_5V is also used as supply for the USB VBUS.
+ */
+ phy-supply = <&hdmi_5v>;
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
index 69c721a70e44..6739697be1de 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
@@ -215,3 +215,7 @@
pinctrl-0 = <&uart_ao_a_pins>;
pinctrl-names = "default";
};
+
+&usb0 {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
index 0a0953fbc7d4..0cfd701809de 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi
@@ -185,3 +185,7 @@
pinctrl-0 = <&uart_ao_a_pins>;
pinctrl-names = "default";
};
+
+&usb0 {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index e1a39cbed8c9..dba365ed4bd5 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -20,6 +20,67 @@
no-map;
};
};
+
+ soc {
+ usb0: usb@c9000000 {
+ status = "disabled";
+ compatible = "amlogic,meson-gxl-dwc3";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ clocks = <&clkc CLKID_USB>;
+ clock-names = "usb_general";
+ resets = <&reset RESET_USB_OTG>;
+ reset-names = "usb_otg";
+
+ dwc3: dwc3@c9000000 {
+ compatible = "snps,dwc3";
+ reg = <0x0 0xc9000000 0x0 0x100000>;
+ interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+ dr_mode = "host";
+ maximum-speed = "high-speed";
+ snps,dis_u2_susphy_quirk;
+ phys = <&usb3_phy>, <&usb2_phy0>, <&usb2_phy1>;
+ };
+ };
+ };
+};
+
+&apb {
+ usb2_phy0: phy@78000 {
+ compatible = "amlogic,meson-gxl-usb2-phy";
+ #phy-cells = <0>;
+ reg = <0x0 0x78000 0x0 0x20>;
+ clocks = <&clkc CLKID_USB>;
+ clock-names = "phy";
+ resets = <&reset RESET_USB_OTG>;
+ reset-names = "phy";
+ status = "okay";
+ };
+
+ usb2_phy1: phy@78020 {
+ compatible = "amlogic,meson-gxl-usb2-phy";
+ #phy-cells = <0>;
+ reg = <0x0 0x78020 0x0 0x20>;
+ clocks = <&clkc CLKID_USB>;
+ clock-names = "phy";
+ resets = <&reset RESET_USB_OTG>;
+ reset-names = "phy";
+ status = "okay";
+ };
+
+ usb3_phy: phy@78080 {
+ compatible = "amlogic,meson-gxl-usb3-phy";
+ #phy-cells = <0>;
+ reg = <0x0 0x78080 0x0 0x20>;
+ interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clkc CLKID_USB>, <&clkc_AO CLKID_AO_CEC_32K>;
+ clock-names = "phy", "peripheral";
+ resets = <&reset RESET_USB_OTG>, <&reset RESET_USB_OTG>;
+ reset-names = "phy", "peripheral";
+ status = "okay";
+ };
};
&ethmac {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
index 4fd46c1546a7..0868da476e41 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts
@@ -406,3 +406,7 @@
status = "okay";
vref-supply = <&vddio_ao18>;
};
+
+&usb0 {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
index d076a7c425dd..247888d68a3a 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
@@ -80,6 +80,19 @@
};
};
+&apb {
+ usb2_phy2: phy@78040 {
+ compatible = "amlogic,meson-gxl-usb2-phy";
+ #phy-cells = <0>;
+ reg = <0x0 0x78040 0x0 0x20>;
+ clocks = <&clkc CLKID_USB>;
+ clock-names = "phy";
+ resets = <&reset RESET_USB_OTG>;
+ reset-names = "phy";
+ status = "okay";
+ };
+};
+
&clkc_AO {
compatible = "amlogic,meson-gxm-aoclkc", "amlogic,meson-gx-aoclkc";
};
@@ -100,3 +113,7 @@
&hdmi_tx {
compatible = "amlogic,meson-gxm-dw-hdmi", "amlogic,meson-gx-dw-hdmi";
};
+
+&dwc3 {
+ phys = <&usb3_phy>, <&usb2_phy0>, <&usb2_phy1>, <&usb2_phy2>;
+};
diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
index 2ac43221ddb6..69804c5f1197 100644
--- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
@@ -56,8 +56,6 @@
gpio_keys {
compatible = "gpio-keys";
- #address-cells = <1>;
- #size-cells = <0>;
power-button {
debounce_interval = <50>;
diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi
index 4b5465da81d8..8c68e0c26f1b 100644
--- a/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi
+++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-sata.dtsi
@@ -36,11 +36,11 @@
#size-cells = <1>;
ranges = <0x0 0x0 0x67d00000 0x00800000>;
- sata0: ahci@210000 {
+ sata0: ahci@0 {
compatible = "brcm,iproc-ahci", "generic-ahci";
- reg = <0x00210000 0x1000>;
+ reg = <0x00000000 0x1000>;
reg-names = "ahci";
- interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -52,9 +52,9 @@
};
};
- sata_phy0: sata_phy@212100 {
+ sata_phy0: sata_phy@2100 {
compatible = "brcm,iproc-sr-sata-phy";
- reg = <0x00212100 0x1000>;
+ reg = <0x00002100 0x1000>;
reg-names = "phy";
#address-cells = <1>;
#size-cells = <0>;
@@ -66,11 +66,11 @@
};
};
- sata1: ahci@310000 {
+ sata1: ahci@10000 {
compatible = "brcm,iproc-ahci", "generic-ahci";
- reg = <0x00310000 0x1000>;
+ reg = <0x00010000 0x1000>;
reg-names = "ahci";
- interrupts = <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 323 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -82,9 +82,9 @@
};
};
- sata_phy1: sata_phy@312100 {
+ sata_phy1: sata_phy@12100 {
compatible = "brcm,iproc-sr-sata-phy";
- reg = <0x00312100 0x1000>;
+ reg = <0x00012100 0x1000>;
reg-names = "phy";
#address-cells = <1>;
#size-cells = <0>;
@@ -96,11 +96,11 @@
};
};
- sata2: ahci@120000 {
+ sata2: ahci@20000 {
compatible = "brcm,iproc-ahci", "generic-ahci";
- reg = <0x00120000 0x1000>;
+ reg = <0x00020000 0x1000>;
reg-names = "ahci";
- interrupts = <GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 325 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -112,9 +112,9 @@
};
};
- sata_phy2: sata_phy@122100 {
+ sata_phy2: sata_phy@22100 {
compatible = "brcm,iproc-sr-sata-phy";
- reg = <0x00122100 0x1000>;
+ reg = <0x00022100 0x1000>;
reg-names = "phy";
#address-cells = <1>;
#size-cells = <0>;
@@ -126,11 +126,11 @@
};
};
- sata3: ahci@130000 {
+ sata3: ahci@30000 {
compatible = "brcm,iproc-ahci", "generic-ahci";
- reg = <0x00130000 0x1000>;
+ reg = <0x00030000 0x1000>;
reg-names = "ahci";
- interrupts = <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 327 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -142,9 +142,9 @@
};
};
- sata_phy3: sata_phy@132100 {
+ sata_phy3: sata_phy@32100 {
compatible = "brcm,iproc-sr-sata-phy";
- reg = <0x00132100 0x1000>;
+ reg = <0x00032100 0x1000>;
reg-names = "phy";
#address-cells = <1>;
#size-cells = <0>;
@@ -156,11 +156,11 @@
};
};
- sata4: ahci@330000 {
+ sata4: ahci@100000 {
compatible = "brcm,iproc-ahci", "generic-ahci";
- reg = <0x00330000 0x1000>;
+ reg = <0x00100000 0x1000>;
reg-names = "ahci";
- interrupts = <GIC_SPI 351 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 329 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -172,9 +172,9 @@
};
};
- sata_phy4: sata_phy@332100 {
+ sata_phy4: sata_phy@102100 {
compatible = "brcm,iproc-sr-sata-phy";
- reg = <0x00332100 0x1000>;
+ reg = <0x00102100 0x1000>;
reg-names = "phy";
#address-cells = <1>;
#size-cells = <0>;
@@ -186,11 +186,11 @@
};
};
- sata5: ahci@400000 {
+ sata5: ahci@110000 {
compatible = "brcm,iproc-ahci", "generic-ahci";
- reg = <0x00400000 0x1000>;
+ reg = <0x00110000 0x1000>;
reg-names = "ahci";
- interrupts = <GIC_SPI 353 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 331 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -202,9 +202,9 @@
};
};
- sata_phy5: sata_phy@402100 {
+ sata_phy5: sata_phy@112100 {
compatible = "brcm,iproc-sr-sata-phy";
- reg = <0x00402100 0x1000>;
+ reg = <0x00112100 0x1000>;
reg-names = "phy";
#address-cells = <1>;
#size-cells = <0>;
@@ -216,11 +216,11 @@
};
};
- sata6: ahci@410000 {
+ sata6: ahci@120000 {
compatible = "brcm,iproc-ahci", "generic-ahci";
- reg = <0x00410000 0x1000>;
+ reg = <0x00120000 0x1000>;
reg-names = "ahci";
- interrupts = <GIC_SPI 355 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -232,9 +232,9 @@
};
};
- sata_phy6: sata_phy@412100 {
+ sata_phy6: sata_phy@122100 {
compatible = "brcm,iproc-sr-sata-phy";
- reg = <0x00412100 0x1000>;
+ reg = <0x00122100 0x1000>;
reg-names = "phy";
#address-cells = <1>;
#size-cells = <0>;
@@ -246,11 +246,11 @@
};
};
- sata7: ahci@420000 {
+ sata7: ahci@130000 {
compatible = "brcm,iproc-ahci", "generic-ahci";
- reg = <0x00420000 0x1000>;
+ reg = <0x00130000 0x1000>;
reg-names = "ahci";
- interrupts = <GIC_SPI 357 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
@@ -262,9 +262,9 @@
};
};
- sata_phy7: sata_phy@422100 {
+ sata_phy7: sata_phy@132100 {
compatible = "brcm,iproc-sr-sata-phy";
- reg = <0x00422100 0x1000>;
+ reg = <0x00132100 0x1000>;
reg-names = "phy";
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi
index c0231d077fa6..1ad8677f6a0a 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi
@@ -1317,7 +1317,7 @@
reg = <0x14d60000 0x100>;
dmas = <&pdma0 31 &pdma0 30>;
dma-names = "tx", "rx";
- interrupts = <GIC_SPI 435 IRQ_TYPE_NONE>;
+ interrupts = <GIC_SPI 435 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cmu_peric CLK_PCLK_I2S1>,
<&cmu_peric CLK_PCLK_I2S1>,
<&cmu_peric CLK_SCLK_I2S1>;
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 724a0d3b7683..edb4ee0b8896 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -299,7 +299,6 @@
/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
dwmmc_0: dwmmc0@f723d000 {
- max-frequency = <150000000>;
cap-mmc-highspeed;
mmc-hs200-1_8v;
non-removable;
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
index 48cad7919efa..ed2f1237ea1e 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi
@@ -38,9 +38,10 @@
compatible = "marvell,armada-7k-pp22";
reg = <0x0 0x100000>, <0x129000 0xb000>;
clocks = <&CP110_LABEL(clk) 1 3>, <&CP110_LABEL(clk) 1 9>,
- <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 18>;
+ <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 6>,
+ <&CP110_LABEL(clk) 1 18>;
clock-names = "pp_clk", "gop_clk",
- "mg_clk", "axi_clk";
+ "mg_clk", "mg_core_clk", "axi_clk";
marvell,system-controller = <&CP110_LABEL(syscon0)>;
status = "disabled";
dma-coherent;
@@ -141,6 +142,8 @@
#size-cells = <0>;
compatible = "marvell,xmdio";
reg = <0x12a600 0x10>;
+ clocks = <&CP110_LABEL(clk) 1 5>,
+ <&CP110_LABEL(clk) 1 6>, <&CP110_LABEL(clk) 1 18>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
index a8baad7b80df..13f57fff1477 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
@@ -46,7 +46,7 @@
compatible = "ethernet-phy-ieee802.3-c22";
reg = <0x0>;
interrupt-parent = <&gpio>;
- interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_LOW>;
};
};
};
diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi
index 24552f19b3fa..6a573875d45a 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi
@@ -36,4 +36,30 @@
drive-strength = <2>; /* 2 MA */
};
};
+
+ blsp1_uart1_default: blsp1_uart1_default {
+ mux {
+ pins = "gpio41", "gpio42", "gpio43", "gpio44";
+ function = "blsp_uart2";
+ };
+
+ config {
+ pins = "gpio41", "gpio42", "gpio43", "gpio44";
+ drive-strength = <16>;
+ bias-disable;
+ };
+ };
+
+ blsp1_uart1_sleep: blsp1_uart1_sleep {
+ mux {
+ pins = "gpio41", "gpio42", "gpio43", "gpio44";
+ function = "gpio";
+ };
+
+ config {
+ pins = "gpio41", "gpio42", "gpio43", "gpio44";
+ drive-strength = <2>;
+ bias-disable;
+ };
+ };
};
diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi
index 59b29ddfb6e9..6167af955659 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi
@@ -14,6 +14,28 @@
};
};
+ bt_en_gpios: bt_en_gpios {
+ pinconf {
+ pins = "gpio19";
+ function = PMIC_GPIO_FUNC_NORMAL;
+ output-low;
+ power-source = <PM8994_GPIO_S4>; // 1.8V
+ qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
+ bias-pull-down;
+ };
+ };
+
+ wlan_en_gpios: wlan_en_gpios {
+ pinconf {
+ pins = "gpio8";
+ function = PMIC_GPIO_FUNC_NORMAL;
+ output-low;
+ power-source = <PM8994_GPIO_S4>; // 1.8V
+ qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
+ bias-pull-down;
+ };
+ };
+
volume_up_gpio: pm8996_gpio2 {
pinconf {
pins = "gpio2";
@@ -26,6 +48,16 @@
};
};
+ divclk4_pin_a: divclk4 {
+ pinconf {
+ pins = "gpio18";
+ function = PMIC_GPIO_FUNC_FUNC2;
+
+ bias-disable;
+ power-source = <PM8994_GPIO_S4>;
+ };
+ };
+
usb3_vbus_det_gpio: pm8996_gpio22 {
pinconf {
pins = "gpio22";
diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
index 1c8f1b86472d..4b8bb026346e 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
@@ -23,6 +23,7 @@
aliases {
serial0 = &blsp2_uart1;
serial1 = &blsp2_uart2;
+ serial2 = &blsp1_uart1;
i2c0 = &blsp1_i2c2;
i2c1 = &blsp2_i2c1;
i2c2 = &blsp2_i2c0;
@@ -34,7 +35,36 @@
stdout-path = "serial0:115200n8";
};
+ clocks {
+ divclk4: divclk4 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ clock-output-names = "divclk4";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&divclk4_pin_a>;
+ };
+ };
+
soc {
+ serial@7570000 {
+ label = "BT-UART";
+ status = "okay";
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&blsp1_uart1_default>;
+ pinctrl-1 = <&blsp1_uart1_sleep>;
+
+ bluetooth {
+ compatible = "qcom,qca6174-bt";
+
+ /* bt_disable_n gpio */
+ enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
+
+ clocks = <&divclk4>;
+ };
+ };
+
serial@75b0000 {
label = "LS-UART1";
status = "okay";
@@ -139,9 +169,40 @@
pinctrl-0 = <&usb2_vbus_det_gpio>;
};
+ bt_en: bt-en-1-8v {
+ pinctrl-names = "default";
+ pinctrl-0 = <&bt_en_gpios>;
+ compatible = "regulator-fixed";
+ regulator-name = "bt-en-regulator";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ /* WLAN card specific delay */
+ startup-delay-us = <70000>;
+ enable-active-high;
+ };
+
+ wlan_en: wlan-en-1-8v {
+ pinctrl-names = "default";
+ pinctrl-0 = <&wlan_en_gpios>;
+ compatible = "regulator-fixed";
+ regulator-name = "wlan-en-regulator";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ gpio = <&pm8994_gpios 8 0>;
+
+ /* WLAN card specific delay */
+ startup-delay-us = <70000>;
+ enable-active-high;
+ };
+
agnoc@0 {
qcom,pcie@600000 {
+ status = "okay";
perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>;
+ vddpe-supply = <&wlan_en>;
+ vddpe1-supply = <&bt_en>;
};
qcom,pcie@608000 {
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 410ae787ebb4..f8e49d0b4681 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -419,6 +419,16 @@
#clock-cells = <1>;
};
+ blsp1_uart1: serial@7570000 {
+ compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
+ reg = <0x07570000 0x1000>;
+ interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>,
+ <&gcc GCC_BLSP1_AHB_CLK>;
+ clock-names = "core", "iface";
+ status = "disabled";
+ };
+
blsp1_spi0: spi@7575000 {
compatible = "qcom,spi-qup-v2.2.1";
reg = <0x07575000 0x600>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
index e62bda1cf2d9..c32dd3419c87 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi
@@ -414,7 +414,7 @@
mmc-ddr-1_8v;
mmc-hs200-1_8v;
mmc-pwrseq = <&emmc_pwrseq>;
- cdns,phy-input-delay-legacy = <4>;
+ cdns,phy-input-delay-legacy = <9>;
cdns,phy-input-delay-mmc-highspeed = <2>;
cdns,phy-input-delay-mmc-ddr = <3>;
cdns,phy-dll-delay-sdclk = <21>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
index 2c1a92fafbfb..440c2e6a638b 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
@@ -67,3 +67,11 @@
reg = <0>;
};
};
+
+&pinctrl_ether_rgmii {
+ tx {
+ pins = "RGMII_TXCLK", "RGMII_TXD0", "RGMII_TXD1",
+ "RGMII_TXD2", "RGMII_TXD3", "RGMII_TXCTL";
+ drive-strength = <9>;
+ };
+};
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
index 9efe20d07589..3a5ed789c056 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
@@ -519,7 +519,7 @@
mmc-ddr-1_8v;
mmc-hs200-1_8v;
mmc-pwrseq = <&emmc_pwrseq>;
- cdns,phy-input-delay-legacy = <4>;
+ cdns,phy-input-delay-legacy = <9>;
cdns,phy-input-delay-mmc-highspeed = <2>;
cdns,phy-input-delay-mmc-ddr = <3>;
cdns,phy-dll-delay-sdclk = <21>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
index 7c8f710d9bfa..e85d6ddea3c2 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
@@ -334,7 +334,7 @@
mmc-ddr-1_8v;
mmc-hs200-1_8v;
mmc-pwrseq = <&emmc_pwrseq>;
- cdns,phy-input-delay-legacy = <4>;
+ cdns,phy-input-delay-legacy = <9>;
cdns,phy-input-delay-mmc-highspeed = <2>;
cdns,phy-input-delay-mmc-ddr = <3>;
cdns,phy-dll-delay-sdclk = <21>;
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index cb5a243110c4..e3fdb0fd6f70 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -47,6 +47,12 @@ config CRYPTO_SM3_ARM64_CE
select CRYPTO_HASH
select CRYPTO_SM3
+config CRYPTO_SM4_ARM64_CE
+ tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_SM4
+
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 8df9f326f449..bcafd016618e 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -23,6 +23,9 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
+obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o
+sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
+
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
@@ -78,4 +81,4 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
endif
-.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
+targets += sha256-core.S sha512-core.S
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index e3a375c4cb83..88f5aef7934c 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -19,24 +19,33 @@
* u32 *macp, u8 const rk[], u32 rounds);
*/
ENTRY(ce_aes_ccm_auth_data)
- ldr w8, [x3] /* leftover from prev round? */
+ frame_push 7
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+ ldr w25, [x22] /* leftover from prev round? */
ld1 {v0.16b}, [x0] /* load mac */
- cbz w8, 1f
- sub w8, w8, #16
+ cbz w25, 1f
+ sub w25, w25, #16
eor v1.16b, v1.16b, v1.16b
-0: ldrb w7, [x1], #1 /* get 1 byte of input */
- subs w2, w2, #1
- add w8, w8, #1
+0: ldrb w7, [x20], #1 /* get 1 byte of input */
+ subs w21, w21, #1
+ add w25, w25, #1
ins v1.b[0], w7
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
beq 8f /* out of input? */
- cbnz w8, 0b
+ cbnz w25, 0b
eor v0.16b, v0.16b, v1.16b
-1: ld1 {v3.4s}, [x4] /* load first round key */
- prfm pldl1strm, [x1]
- cmp w5, #12 /* which key size? */
- add x6, x4, #16
- sub w7, w5, #2 /* modified # of rounds */
+1: ld1 {v3.4s}, [x23] /* load first round key */
+ prfm pldl1strm, [x20]
+ cmp w24, #12 /* which key size? */
+ add x6, x23, #16
+ sub w7, w24, #2 /* modified # of rounds */
bmi 2f
bne 5f
mov v5.16b, v3.16b
@@ -55,33 +64,43 @@ ENTRY(ce_aes_ccm_auth_data)
ld1 {v5.4s}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
- subs w2, w2, #16 /* last data? */
+ subs w21, w21, #16 /* last data? */
eor v0.16b, v0.16b, v5.16b /* final round */
bmi 6f
- ld1 {v1.16b}, [x1], #16 /* load next input block */
+ ld1 {v1.16b}, [x20], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */
- bne 1b
-6: st1 {v0.16b}, [x0] /* store mac */
+ beq 6f
+
+ if_will_cond_yield_neon
+ st1 {v0.16b}, [x19] /* store mac */
+ do_cond_yield_neon
+ ld1 {v0.16b}, [x19] /* reload mac */
+ endif_yield_neon
+
+ b 1b
+6: st1 {v0.16b}, [x19] /* store mac */
beq 10f
- adds w2, w2, #16
+ adds w21, w21, #16
beq 10f
- mov w8, w2
-7: ldrb w7, [x1], #1
+ mov w25, w21
+7: ldrb w7, [x20], #1
umov w6, v0.b[0]
eor w6, w6, w7
- strb w6, [x0], #1
- subs w2, w2, #1
+ strb w6, [x19], #1
+ subs w21, w21, #1
beq 10f
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
b 7b
-8: mov w7, w8
- add w8, w8, #16
+8: mov w7, w25
+ add w25, w25, #16
9: ext v1.16b, v1.16b, v1.16b, #1
adds w7, w7, #1
bne 9b
eor v0.16b, v0.16b, v1.16b
- st1 {v0.16b}, [x0]
-10: str w8, [x3]
+ st1 {v0.16b}, [x19]
+10: str w25, [x22]
+
+ frame_pop
ret
ENDPROC(ce_aes_ccm_auth_data)
@@ -126,19 +145,29 @@ ENTRY(ce_aes_ccm_final)
ENDPROC(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc
- ldr x8, [x6, #8] /* load lower ctr */
- ld1 {v0.16b}, [x5] /* load mac */
-CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
+ frame_push 8
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+ mov x25, x6
+
+ ldr x26, [x25, #8] /* load lower ctr */
+ ld1 {v0.16b}, [x24] /* load mac */
+CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
0: /* outer loop */
- ld1 {v1.8b}, [x6] /* load upper ctr */
- prfm pldl1strm, [x1]
- add x8, x8, #1
- rev x9, x8
- cmp w4, #12 /* which key size? */
- sub w7, w4, #2 /* get modified # of rounds */
+ ld1 {v1.8b}, [x25] /* load upper ctr */
+ prfm pldl1strm, [x20]
+ add x26, x26, #1
+ rev x9, x26
+ cmp w23, #12 /* which key size? */
+ sub w7, w23, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */
- ld1 {v3.4s}, [x3] /* load first round key */
- add x10, x3, #16
+ ld1 {v3.4s}, [x22] /* load first round key */
+ add x10, x22, #16
bmi 1f
bne 4f
mov v5.16b, v3.16b
@@ -165,9 +194,9 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
bpl 2b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
- subs w2, w2, #16
- bmi 6f /* partial block? */
- ld1 {v2.16b}, [x1], #16 /* load next input block */
+ subs w21, w21, #16
+ bmi 7f /* partial block? */
+ ld1 {v2.16b}, [x20], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
@@ -176,18 +205,29 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
eor v1.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
- st1 {v1.16b}, [x0], #16 /* write output block */
- bne 0b
-CPU_LE( rev x8, x8 )
- st1 {v0.16b}, [x5] /* store mac */
- str x8, [x6, #8] /* store lsb end of ctr (BE) */
-5: ret
-
-6: eor v0.16b, v0.16b, v5.16b /* final round mac */
+ st1 {v1.16b}, [x19], #16 /* write output block */
+ beq 5f
+
+ if_will_cond_yield_neon
+ st1 {v0.16b}, [x24] /* store mac */
+ do_cond_yield_neon
+ ld1 {v0.16b}, [x24] /* reload mac */
+ endif_yield_neon
+
+ b 0b
+5:
+CPU_LE( rev x26, x26 )
+ st1 {v0.16b}, [x24] /* store mac */
+ str x26, [x25, #8] /* store lsb end of ctr (BE) */
+
+6: frame_pop
+ ret
+
+7: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
- st1 {v0.16b}, [x5] /* store mac */
- add w2, w2, #16 /* process partial tail block */
-7: ldrb w9, [x1], #1 /* get 1 byte of input */
+ st1 {v0.16b}, [x24] /* store mac */
+ add w21, w21, #16 /* process partial tail block */
+8: ldrb w9, [x20], #1 /* get 1 byte of input */
umov w6, v1.b[0] /* get top crypted ctr byte */
umov w7, v0.b[0] /* get top mac byte */
.if \enc == 1
@@ -197,13 +237,13 @@ CPU_LE( rev x8, x8 )
eor w9, w9, w6
eor w7, w7, w9
.endif
- strb w9, [x0], #1 /* store out byte */
- strb w7, [x5], #1 /* store mac byte */
- subs w2, w2, #1
- beq 5b
+ strb w9, [x19], #1 /* store out byte */
+ strb w7, [x24], #1 /* store mac byte */
+ subs w21, w21, #1
+ beq 6b
ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
- b 7b
+ b 8b
.endm
/*
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 50330f5c3adc..623e74ed1c67 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -30,18 +30,21 @@
.endm
/* prepare for encryption with key in rk[] */
- .macro enc_prepare, rounds, rk, ignore
- load_round_keys \rounds, \rk
+ .macro enc_prepare, rounds, rk, temp
+ mov \temp, \rk
+ load_round_keys \rounds, \temp
.endm
/* prepare for encryption (again) but with new key in rk[] */
- .macro enc_switch_key, rounds, rk, ignore
- load_round_keys \rounds, \rk
+ .macro enc_switch_key, rounds, rk, temp
+ mov \temp, \rk
+ load_round_keys \rounds, \temp
.endm
/* prepare for decryption with key in rk[] */
- .macro dec_prepare, rounds, rk, ignore
- load_round_keys \rounds, \rk
+ .macro dec_prepare, rounds, rk, temp
+ mov \temp, \rk
+ load_round_keys \rounds, \temp
.endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index a68412e1e3a4..483a7130cf0e 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -14,12 +14,12 @@
.align 4
aes_encrypt_block4x:
- encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
+ encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
ret
ENDPROC(aes_encrypt_block4x)
aes_decrypt_block4x:
- decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
+ decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
ret
ENDPROC(aes_decrypt_block4x)
@@ -31,57 +31,71 @@ ENDPROC(aes_decrypt_block4x)
*/
AES_ENTRY(aes_ecb_encrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 5
- enc_prepare w3, x2, x5
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+
+.Lecbencrestart:
+ enc_prepare w22, x21, x5
.LecbencloopNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lecbenc1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
bl aes_encrypt_block4x
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
+ cond_yield_neon .Lecbencrestart
b .LecbencloopNx
.Lecbenc1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lecbencout
.Lecbencloop:
- ld1 {v0.16b}, [x1], #16 /* get next pt block */
- encrypt_block v0, w3, x2, x5, w6
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
+ ld1 {v0.16b}, [x20], #16 /* get next pt block */
+ encrypt_block v0, w22, x21, x5, w6
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
bne .Lecbencloop
.Lecbencout:
- ldp x29, x30, [sp], #16
+ frame_pop
ret
AES_ENDPROC(aes_ecb_encrypt)
AES_ENTRY(aes_ecb_decrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 5
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
- dec_prepare w3, x2, x5
+.Lecbdecrestart:
+ dec_prepare w22, x21, x5
.LecbdecloopNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lecbdec1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
bl aes_decrypt_block4x
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
+ cond_yield_neon .Lecbdecrestart
b .LecbdecloopNx
.Lecbdec1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lecbdecout
.Lecbdecloop:
- ld1 {v0.16b}, [x1], #16 /* get next ct block */
- decrypt_block v0, w3, x2, x5, w6
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
+ ld1 {v0.16b}, [x20], #16 /* get next ct block */
+ decrypt_block v0, w22, x21, x5, w6
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
bne .Lecbdecloop
.Lecbdecout:
- ldp x29, x30, [sp], #16
+ frame_pop
ret
AES_ENDPROC(aes_ecb_decrypt)
@@ -94,78 +108,100 @@ AES_ENDPROC(aes_ecb_decrypt)
*/
AES_ENTRY(aes_cbc_encrypt)
- ld1 {v4.16b}, [x5] /* get iv */
- enc_prepare w3, x2, x6
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+.Lcbcencrestart:
+ ld1 {v4.16b}, [x24] /* get iv */
+ enc_prepare w22, x21, x6
.Lcbcencloop4x:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lcbcenc1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
- encrypt_block v0, w3, x2, x6, w7
+ encrypt_block v0, w22, x21, x6, w7
eor v1.16b, v1.16b, v0.16b
- encrypt_block v1, w3, x2, x6, w7
+ encrypt_block v1, w22, x21, x6, w7
eor v2.16b, v2.16b, v1.16b
- encrypt_block v2, w3, x2, x6, w7
+ encrypt_block v2, w22, x21, x6, w7
eor v3.16b, v3.16b, v2.16b
- encrypt_block v3, w3, x2, x6, w7
- st1 {v0.16b-v3.16b}, [x0], #64
+ encrypt_block v3, w22, x21, x6, w7
+ st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v3.16b
+ st1 {v4.16b}, [x24] /* return iv */
+ cond_yield_neon .Lcbcencrestart
b .Lcbcencloop4x
.Lcbcenc1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lcbcencout
.Lcbcencloop:
- ld1 {v0.16b}, [x1], #16 /* get next pt block */
+ ld1 {v0.16b}, [x20], #16 /* get next pt block */
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
- encrypt_block v4, w3, x2, x6, w7
- st1 {v4.16b}, [x0], #16
- subs w4, w4, #1
+ encrypt_block v4, w22, x21, x6, w7
+ st1 {v4.16b}, [x19], #16
+ subs w23, w23, #1
bne .Lcbcencloop
.Lcbcencout:
- st1 {v4.16b}, [x5] /* return iv */
+ st1 {v4.16b}, [x24] /* return iv */
+ frame_pop
ret
AES_ENDPROC(aes_cbc_encrypt)
AES_ENTRY(aes_cbc_decrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
- ld1 {v7.16b}, [x5] /* get iv */
- dec_prepare w3, x2, x6
+.Lcbcdecrestart:
+ ld1 {v7.16b}, [x24] /* get iv */
+ dec_prepare w22, x21, x6
.LcbcdecloopNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lcbcdec1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
bl aes_decrypt_block4x
- sub x1, x1, #16
+ sub x20, x20, #16
eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v4.16b
- ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
+ ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
+ st1 {v7.16b}, [x24] /* return iv */
+ cond_yield_neon .Lcbcdecrestart
b .LcbcdecloopNx
.Lcbcdec1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lcbcdecout
.Lcbcdecloop:
- ld1 {v1.16b}, [x1], #16 /* get next ct block */
+ ld1 {v1.16b}, [x20], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
- decrypt_block v0, w3, x2, x6, w7
+ decrypt_block v0, w22, x21, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
bne .Lcbcdecloop
.Lcbcdecout:
- st1 {v7.16b}, [x5] /* return iv */
- ldp x29, x30, [sp], #16
+ st1 {v7.16b}, [x24] /* return iv */
+ frame_pop
ret
AES_ENDPROC(aes_cbc_decrypt)
@@ -176,19 +212,26 @@ AES_ENDPROC(aes_cbc_decrypt)
*/
AES_ENTRY(aes_ctr_encrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 6
- enc_prepare w3, x2, x6
- ld1 {v4.16b}, [x5]
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+.Lctrrestart:
+ enc_prepare w22, x21, x6
+ ld1 {v4.16b}, [x24]
umov x6, v4.d[1] /* keep swabbed ctr in reg */
rev x6, x6
- cmn w6, w4 /* 32 bit overflow? */
- bcs .Lctrloop
.LctrloopNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lctr1x
+ cmn w6, #4 /* 32 bit overflow? */
+ bcs .Lctr1x
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
dup v7.4s, w6
mov v0.16b, v4.16b
@@ -200,25 +243,27 @@ AES_ENTRY(aes_ctr_encrypt)
mov v1.s[3], v8.s[0]
mov v2.s[3], v8.s[1]
mov v3.s[3], v8.s[2]
- ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
+ ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
bl aes_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
- ld1 {v5.16b}, [x1], #16 /* get 1 input block */
+ ld1 {v5.16b}, [x20], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
add x6, x6, #4
rev x7, x6
ins v4.d[1], x7
- cbz w4, .Lctrout
+ cbz w23, .Lctrout
+ st1 {v4.16b}, [x24] /* return next CTR value */
+ cond_yield_neon .Lctrrestart
b .LctrloopNx
.Lctr1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lctrout
.Lctrloop:
mov v0.16b, v4.16b
- encrypt_block v0, w3, x2, x8, w7
+ encrypt_block v0, w22, x21, x8, w7
adds x6, x6, #1 /* increment BE ctr */
rev x7, x6
@@ -226,22 +271,22 @@ AES_ENTRY(aes_ctr_encrypt)
bcs .Lctrcarry /* overflow? */
.Lctrcarrydone:
- subs w4, w4, #1
+ subs w23, w23, #1
bmi .Lctrtailblock /* blocks <0 means tail block */
- ld1 {v3.16b}, [x1], #16
+ ld1 {v3.16b}, [x20], #16
eor v3.16b, v0.16b, v3.16b
- st1 {v3.16b}, [x0], #16
+ st1 {v3.16b}, [x19], #16
bne .Lctrloop
.Lctrout:
- st1 {v4.16b}, [x5] /* return next CTR value */
- ldp x29, x30, [sp], #16
+ st1 {v4.16b}, [x24] /* return next CTR value */
+.Lctrret:
+ frame_pop
ret
.Lctrtailblock:
- st1 {v0.16b}, [x0]
- ldp x29, x30, [sp], #16
- ret
+ st1 {v0.16b}, [x19]
+ b .Lctrret
.Lctrcarry:
umov x7, v4.d[0] /* load upper word of ctr */
@@ -274,10 +319,16 @@ CPU_LE( .quad 1, 0x87 )
CPU_BE( .quad 0x87, 1 )
AES_ENTRY(aes_xts_encrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x6
- ld1 {v4.16b}, [x6]
+ ld1 {v4.16b}, [x24]
cbz w7, .Lxtsencnotfirst
enc_prepare w3, x5, x8
@@ -286,15 +337,17 @@ AES_ENTRY(aes_xts_encrypt)
ldr q7, .Lxts_mul_x
b .LxtsencNx
+.Lxtsencrestart:
+ ld1 {v4.16b}, [x24]
.Lxtsencnotfirst:
- enc_prepare w3, x2, x8
+ enc_prepare w22, x21, x8
.LxtsencloopNx:
ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8
.LxtsencNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lxtsenc1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
next_tweak v6, v5, v7, v8
@@ -307,35 +360,43 @@ AES_ENTRY(aes_xts_encrypt)
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v7.16b
- cbz w4, .Lxtsencout
+ cbz w23, .Lxtsencout
+ st1 {v4.16b}, [x24]
+ cond_yield_neon .Lxtsencrestart
b .LxtsencloopNx
.Lxtsenc1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lxtsencout
.Lxtsencloop:
- ld1 {v1.16b}, [x1], #16
+ ld1 {v1.16b}, [x20], #16
eor v0.16b, v1.16b, v4.16b
- encrypt_block v0, w3, x2, x8, w7
+ encrypt_block v0, w22, x21, x8, w7
eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
beq .Lxtsencout
next_tweak v4, v4, v7, v8
b .Lxtsencloop
.Lxtsencout:
- st1 {v4.16b}, [x6]
- ldp x29, x30, [sp], #16
+ st1 {v4.16b}, [x24]
+ frame_pop
ret
AES_ENDPROC(aes_xts_encrypt)
AES_ENTRY(aes_xts_decrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 6
- ld1 {v4.16b}, [x6]
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x6
+
+ ld1 {v4.16b}, [x24]
cbz w7, .Lxtsdecnotfirst
enc_prepare w3, x5, x8
@@ -344,15 +405,17 @@ AES_ENTRY(aes_xts_decrypt)
ldr q7, .Lxts_mul_x
b .LxtsdecNx
+.Lxtsdecrestart:
+ ld1 {v4.16b}, [x24]
.Lxtsdecnotfirst:
- dec_prepare w3, x2, x8
+ dec_prepare w22, x21, x8
.LxtsdecloopNx:
ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8
.LxtsdecNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lxtsdec1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
next_tweak v6, v5, v7, v8
@@ -365,26 +428,28 @@ AES_ENTRY(aes_xts_decrypt)
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v7.16b
- cbz w4, .Lxtsdecout
+ cbz w23, .Lxtsdecout
+ st1 {v4.16b}, [x24]
+ cond_yield_neon .Lxtsdecrestart
b .LxtsdecloopNx
.Lxtsdec1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lxtsdecout
.Lxtsdecloop:
- ld1 {v1.16b}, [x1], #16
+ ld1 {v1.16b}, [x20], #16
eor v0.16b, v1.16b, v4.16b
- decrypt_block v0, w3, x2, x8, w7
+ decrypt_block v0, w22, x21, x8, w7
eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
beq .Lxtsdecout
next_tweak v4, v4, v7, v8
b .Lxtsdecloop
.Lxtsdecout:
- st1 {v4.16b}, [x6]
- ldp x29, x30, [sp], #16
+ st1 {v4.16b}, [x24]
+ frame_pop
ret
AES_ENDPROC(aes_xts_decrypt)
@@ -393,43 +458,61 @@ AES_ENDPROC(aes_xts_decrypt)
* int blocks, u8 dg[], int enc_before, int enc_after)
*/
AES_ENTRY(aes_mac_update)
- ld1 {v0.16b}, [x4] /* get dg */
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x6
+
+ ld1 {v0.16b}, [x23] /* get dg */
enc_prepare w2, x1, x7
cbz w5, .Lmacloop4x
encrypt_block v0, w2, x1, x7, w8
.Lmacloop4x:
- subs w3, w3, #4
+ subs w22, w22, #4
bmi .Lmac1x
- ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */
+ ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
- encrypt_block v0, w2, x1, x7, w8
+ encrypt_block v0, w21, x20, x7, w8
eor v0.16b, v0.16b, v2.16b
- encrypt_block v0, w2, x1, x7, w8
+ encrypt_block v0, w21, x20, x7, w8
eor v0.16b, v0.16b, v3.16b
- encrypt_block v0, w2, x1, x7, w8
+ encrypt_block v0, w21, x20, x7, w8
eor v0.16b, v0.16b, v4.16b
- cmp w3, wzr
- csinv x5, x6, xzr, eq
+ cmp w22, wzr
+ csinv x5, x24, xzr, eq
cbz w5, .Lmacout
- encrypt_block v0, w2, x1, x7, w8
+ encrypt_block v0, w21, x20, x7, w8
+ st1 {v0.16b}, [x23] /* return dg */
+ cond_yield_neon .Lmacrestart
b .Lmacloop4x
.Lmac1x:
- add w3, w3, #4
+ add w22, w22, #4
.Lmacloop:
- cbz w3, .Lmacout
- ld1 {v1.16b}, [x0], #16 /* get next pt block */
+ cbz w22, .Lmacout
+ ld1 {v1.16b}, [x19], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
- subs w3, w3, #1
- csinv x5, x6, xzr, eq
+ subs w22, w22, #1
+ csinv x5, x24, xzr, eq
cbz w5, .Lmacout
- encrypt_block v0, w2, x1, x7, w8
+.Lmacenc:
+ encrypt_block v0, w21, x20, x7, w8
b .Lmacloop
.Lmacout:
- st1 {v0.16b}, [x4] /* return dg */
+ st1 {v0.16b}, [x23] /* return dg */
+ frame_pop
ret
+
+.Lmacrestart:
+ ld1 {v0.16b}, [x23] /* get dg */
+ enc_prepare w21, x20, x0
+ b .Lmacloop4x
AES_ENDPROC(aes_mac_update)
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index ca0472500433..e613a87f8b53 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -565,54 +565,61 @@ ENDPROC(aesbs_decrypt8)
* int blocks)
*/
.macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 5
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
99: mov x5, #1
- lsl x5, x5, x4
- subs w4, w4, #8
- csel x4, x4, xzr, pl
+ lsl x5, x5, x23
+ subs w23, w23, #8
+ csel x23, x23, xzr, pl
csel x5, x5, xzr, mi
- ld1 {v0.16b}, [x1], #16
+ ld1 {v0.16b}, [x20], #16
tbnz x5, #1, 0f
- ld1 {v1.16b}, [x1], #16
+ ld1 {v1.16b}, [x20], #16
tbnz x5, #2, 0f
- ld1 {v2.16b}, [x1], #16
+ ld1 {v2.16b}, [x20], #16
tbnz x5, #3, 0f
- ld1 {v3.16b}, [x1], #16
+ ld1 {v3.16b}, [x20], #16
tbnz x5, #4, 0f
- ld1 {v4.16b}, [x1], #16
+ ld1 {v4.16b}, [x20], #16
tbnz x5, #5, 0f
- ld1 {v5.16b}, [x1], #16
+ ld1 {v5.16b}, [x20], #16
tbnz x5, #6, 0f
- ld1 {v6.16b}, [x1], #16
+ ld1 {v6.16b}, [x20], #16
tbnz x5, #7, 0f
- ld1 {v7.16b}, [x1], #16
+ ld1 {v7.16b}, [x20], #16
-0: mov bskey, x2
- mov rounds, x3
+0: mov bskey, x21
+ mov rounds, x22
bl \do8
- st1 {\o0\().16b}, [x0], #16
+ st1 {\o0\().16b}, [x19], #16
tbnz x5, #1, 1f
- st1 {\o1\().16b}, [x0], #16
+ st1 {\o1\().16b}, [x19], #16
tbnz x5, #2, 1f
- st1 {\o2\().16b}, [x0], #16
+ st1 {\o2\().16b}, [x19], #16
tbnz x5, #3, 1f
- st1 {\o3\().16b}, [x0], #16
+ st1 {\o3\().16b}, [x19], #16
tbnz x5, #4, 1f
- st1 {\o4\().16b}, [x0], #16
+ st1 {\o4\().16b}, [x19], #16
tbnz x5, #5, 1f
- st1 {\o5\().16b}, [x0], #16
+ st1 {\o5\().16b}, [x19], #16
tbnz x5, #6, 1f
- st1 {\o6\().16b}, [x0], #16
+ st1 {\o6\().16b}, [x19], #16
tbnz x5, #7, 1f
- st1 {\o7\().16b}, [x0], #16
+ st1 {\o7\().16b}, [x19], #16
- cbnz x4, 99b
+ cbz x23, 1f
+ cond_yield_neon
+ b 99b
-1: ldp x29, x30, [sp], #16
+1: frame_pop
ret
.endm
@@ -632,43 +639,49 @@ ENDPROC(aesbs_ecb_decrypt)
*/
.align 4
ENTRY(aesbs_cbc_decrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
99: mov x6, #1
- lsl x6, x6, x4
- subs w4, w4, #8
- csel x4, x4, xzr, pl
+ lsl x6, x6, x23
+ subs w23, w23, #8
+ csel x23, x23, xzr, pl
csel x6, x6, xzr, mi
- ld1 {v0.16b}, [x1], #16
+ ld1 {v0.16b}, [x20], #16
mov v25.16b, v0.16b
tbnz x6, #1, 0f
- ld1 {v1.16b}, [x1], #16
+ ld1 {v1.16b}, [x20], #16
mov v26.16b, v1.16b
tbnz x6, #2, 0f
- ld1 {v2.16b}, [x1], #16
+ ld1 {v2.16b}, [x20], #16
mov v27.16b, v2.16b
tbnz x6, #3, 0f
- ld1 {v3.16b}, [x1], #16
+ ld1 {v3.16b}, [x20], #16
mov v28.16b, v3.16b
tbnz x6, #4, 0f
- ld1 {v4.16b}, [x1], #16
+ ld1 {v4.16b}, [x20], #16
mov v29.16b, v4.16b
tbnz x6, #5, 0f
- ld1 {v5.16b}, [x1], #16
+ ld1 {v5.16b}, [x20], #16
mov v30.16b, v5.16b
tbnz x6, #6, 0f
- ld1 {v6.16b}, [x1], #16
+ ld1 {v6.16b}, [x20], #16
mov v31.16b, v6.16b
tbnz x6, #7, 0f
- ld1 {v7.16b}, [x1]
+ ld1 {v7.16b}, [x20]
-0: mov bskey, x2
- mov rounds, x3
+0: mov bskey, x21
+ mov rounds, x22
bl aesbs_decrypt8
- ld1 {v24.16b}, [x5] // load IV
+ ld1 {v24.16b}, [x24] // load IV
eor v1.16b, v1.16b, v25.16b
eor v6.16b, v6.16b, v26.16b
@@ -679,34 +692,36 @@ ENTRY(aesbs_cbc_decrypt)
eor v3.16b, v3.16b, v30.16b
eor v5.16b, v5.16b, v31.16b
- st1 {v0.16b}, [x0], #16
+ st1 {v0.16b}, [x19], #16
mov v24.16b, v25.16b
tbnz x6, #1, 1f
- st1 {v1.16b}, [x0], #16
+ st1 {v1.16b}, [x19], #16
mov v24.16b, v26.16b
tbnz x6, #2, 1f
- st1 {v6.16b}, [x0], #16
+ st1 {v6.16b}, [x19], #16
mov v24.16b, v27.16b
tbnz x6, #3, 1f
- st1 {v4.16b}, [x0], #16
+ st1 {v4.16b}, [x19], #16
mov v24.16b, v28.16b
tbnz x6, #4, 1f
- st1 {v2.16b}, [x0], #16
+ st1 {v2.16b}, [x19], #16
mov v24.16b, v29.16b
tbnz x6, #5, 1f
- st1 {v7.16b}, [x0], #16
+ st1 {v7.16b}, [x19], #16
mov v24.16b, v30.16b
tbnz x6, #6, 1f
- st1 {v3.16b}, [x0], #16
+ st1 {v3.16b}, [x19], #16
mov v24.16b, v31.16b
tbnz x6, #7, 1f
- ld1 {v24.16b}, [x1], #16
- st1 {v5.16b}, [x0], #16
-1: st1 {v24.16b}, [x5] // store IV
+ ld1 {v24.16b}, [x20], #16
+ st1 {v5.16b}, [x19], #16
+1: st1 {v24.16b}, [x24] // store IV
- cbnz x4, 99b
+ cbz x23, 2f
+ cond_yield_neon
+ b 99b
- ldp x29, x30, [sp], #16
+2: frame_pop
ret
ENDPROC(aesbs_cbc_decrypt)
@@ -731,87 +746,93 @@ CPU_BE( .quad 0x87, 1 )
*/
__xts_crypt8:
mov x6, #1
- lsl x6, x6, x4
- subs w4, w4, #8
- csel x4, x4, xzr, pl
+ lsl x6, x6, x23
+ subs w23, w23, #8
+ csel x23, x23, xzr, pl
csel x6, x6, xzr, mi
- ld1 {v0.16b}, [x1], #16
+ ld1 {v0.16b}, [x20], #16
next_tweak v26, v25, v30, v31
eor v0.16b, v0.16b, v25.16b
tbnz x6, #1, 0f
- ld1 {v1.16b}, [x1], #16
+ ld1 {v1.16b}, [x20], #16
next_tweak v27, v26, v30, v31
eor v1.16b, v1.16b, v26.16b
tbnz x6, #2, 0f
- ld1 {v2.16b}, [x1], #16
+ ld1 {v2.16b}, [x20], #16
next_tweak v28, v27, v30, v31
eor v2.16b, v2.16b, v27.16b
tbnz x6, #3, 0f
- ld1 {v3.16b}, [x1], #16
+ ld1 {v3.16b}, [x20], #16
next_tweak v29, v28, v30, v31
eor v3.16b, v3.16b, v28.16b
tbnz x6, #4, 0f
- ld1 {v4.16b}, [x1], #16
- str q29, [sp, #16]
+ ld1 {v4.16b}, [x20], #16
+ str q29, [sp, #.Lframe_local_offset]
eor v4.16b, v4.16b, v29.16b
next_tweak v29, v29, v30, v31
tbnz x6, #5, 0f
- ld1 {v5.16b}, [x1], #16
- str q29, [sp, #32]
+ ld1 {v5.16b}, [x20], #16
+ str q29, [sp, #.Lframe_local_offset + 16]
eor v5.16b, v5.16b, v29.16b
next_tweak v29, v29, v30, v31
tbnz x6, #6, 0f
- ld1 {v6.16b}, [x1], #16
- str q29, [sp, #48]
+ ld1 {v6.16b}, [x20], #16
+ str q29, [sp, #.Lframe_local_offset + 32]
eor v6.16b, v6.16b, v29.16b
next_tweak v29, v29, v30, v31
tbnz x6, #7, 0f
- ld1 {v7.16b}, [x1], #16
- str q29, [sp, #64]
+ ld1 {v7.16b}, [x20], #16
+ str q29, [sp, #.Lframe_local_offset + 48]
eor v7.16b, v7.16b, v29.16b
next_tweak v29, v29, v30, v31
-0: mov bskey, x2
- mov rounds, x3
+0: mov bskey, x21
+ mov rounds, x22
br x7
ENDPROC(__xts_crypt8)
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
- stp x29, x30, [sp, #-80]!
- mov x29, sp
+ frame_push 6, 64
- ldr q30, .Lxts_mul_x
- ld1 {v25.16b}, [x5]
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+0: ldr q30, .Lxts_mul_x
+ ld1 {v25.16b}, [x24]
99: adr x7, \do8
bl __xts_crypt8
- ldp q16, q17, [sp, #16]
- ldp q18, q19, [sp, #48]
+ ldp q16, q17, [sp, #.Lframe_local_offset]
+ ldp q18, q19, [sp, #.Lframe_local_offset + 32]
eor \o0\().16b, \o0\().16b, v25.16b
eor \o1\().16b, \o1\().16b, v26.16b
eor \o2\().16b, \o2\().16b, v27.16b
eor \o3\().16b, \o3\().16b, v28.16b
- st1 {\o0\().16b}, [x0], #16
+ st1 {\o0\().16b}, [x19], #16
mov v25.16b, v26.16b
tbnz x6, #1, 1f
- st1 {\o1\().16b}, [x0], #16
+ st1 {\o1\().16b}, [x19], #16
mov v25.16b, v27.16b
tbnz x6, #2, 1f
- st1 {\o2\().16b}, [x0], #16
+ st1 {\o2\().16b}, [x19], #16
mov v25.16b, v28.16b
tbnz x6, #3, 1f
- st1 {\o3\().16b}, [x0], #16
+ st1 {\o3\().16b}, [x19], #16
mov v25.16b, v29.16b
tbnz x6, #4, 1f
@@ -820,18 +841,22 @@ ENDPROC(__xts_crypt8)
eor \o6\().16b, \o6\().16b, v18.16b
eor \o7\().16b, \o7\().16b, v19.16b
- st1 {\o4\().16b}, [x0], #16
+ st1 {\o4\().16b}, [x19], #16
tbnz x6, #5, 1f
- st1 {\o5\().16b}, [x0], #16
+ st1 {\o5\().16b}, [x19], #16
tbnz x6, #6, 1f
- st1 {\o6\().16b}, [x0], #16
+ st1 {\o6\().16b}, [x19], #16
tbnz x6, #7, 1f
- st1 {\o7\().16b}, [x0], #16
+ st1 {\o7\().16b}, [x19], #16
+
+ cbz x23, 1f
+ st1 {v25.16b}, [x24]
- cbnz x4, 99b
+ cond_yield_neon 0b
+ b 99b
-1: st1 {v25.16b}, [x5]
- ldp x29, x30, [sp], #80
+1: st1 {v25.16b}, [x24]
+ frame_pop
ret
.endm
@@ -856,24 +881,31 @@ ENDPROC(aesbs_xts_decrypt)
* int rounds, int blocks, u8 iv[], u8 final[])
*/
ENTRY(aesbs_ctr_encrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
-
- cmp x6, #0
- cset x10, ne
- add x4, x4, x10 // do one extra block if final
-
- ldp x7, x8, [x5]
- ld1 {v0.16b}, [x5]
+ frame_push 8
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+ mov x25, x6
+
+ cmp x25, #0
+ cset x26, ne
+ add x23, x23, x26 // do one extra block if final
+
+98: ldp x7, x8, [x24]
+ ld1 {v0.16b}, [x24]
CPU_LE( rev x7, x7 )
CPU_LE( rev x8, x8 )
adds x8, x8, #1
adc x7, x7, xzr
99: mov x9, #1
- lsl x9, x9, x4
- subs w4, w4, #8
- csel x4, x4, xzr, pl
+ lsl x9, x9, x23
+ subs w23, w23, #8
+ csel x23, x23, xzr, pl
csel x9, x9, xzr, le
tbnz x9, #1, 0f
@@ -891,82 +923,85 @@ CPU_LE( rev x8, x8 )
tbnz x9, #7, 0f
next_ctr v7
-0: mov bskey, x2
- mov rounds, x3
+0: mov bskey, x21
+ mov rounds, x22
bl aesbs_encrypt8
- lsr x9, x9, x10 // disregard the extra block
+ lsr x9, x9, x26 // disregard the extra block
tbnz x9, #0, 0f
- ld1 {v8.16b}, [x1], #16
+ ld1 {v8.16b}, [x20], #16
eor v0.16b, v0.16b, v8.16b
- st1 {v0.16b}, [x0], #16
+ st1 {v0.16b}, [x19], #16
tbnz x9, #1, 1f
- ld1 {v9.16b}, [x1], #16
+ ld1 {v9.16b}, [x20], #16
eor v1.16b, v1.16b, v9.16b
- st1 {v1.16b}, [x0], #16
+ st1 {v1.16b}, [x19], #16
tbnz x9, #2, 2f
- ld1 {v10.16b}, [x1], #16
+ ld1 {v10.16b}, [x20], #16
eor v4.16b, v4.16b, v10.16b
- st1 {v4.16b}, [x0], #16
+ st1 {v4.16b}, [x19], #16
tbnz x9, #3, 3f
- ld1 {v11.16b}, [x1], #16
+ ld1 {v11.16b}, [x20], #16
eor v6.16b, v6.16b, v11.16b
- st1 {v6.16b}, [x0], #16
+ st1 {v6.16b}, [x19], #16
tbnz x9, #4, 4f
- ld1 {v12.16b}, [x1], #16
+ ld1 {v12.16b}, [x20], #16
eor v3.16b, v3.16b, v12.16b
- st1 {v3.16b}, [x0], #16
+ st1 {v3.16b}, [x19], #16
tbnz x9, #5, 5f
- ld1 {v13.16b}, [x1], #16
+ ld1 {v13.16b}, [x20], #16
eor v7.16b, v7.16b, v13.16b
- st1 {v7.16b}, [x0], #16
+ st1 {v7.16b}, [x19], #16
tbnz x9, #6, 6f
- ld1 {v14.16b}, [x1], #16
+ ld1 {v14.16b}, [x20], #16
eor v2.16b, v2.16b, v14.16b
- st1 {v2.16b}, [x0], #16
+ st1 {v2.16b}, [x19], #16
tbnz x9, #7, 7f
- ld1 {v15.16b}, [x1], #16
+ ld1 {v15.16b}, [x20], #16
eor v5.16b, v5.16b, v15.16b
- st1 {v5.16b}, [x0], #16
+ st1 {v5.16b}, [x19], #16
8: next_ctr v0
- cbnz x4, 99b
+ st1 {v0.16b}, [x24]
+ cbz x23, 0f
+
+ cond_yield_neon 98b
+ b 99b
-0: st1 {v0.16b}, [x5]
- ldp x29, x30, [sp], #16
+0: frame_pop
ret
/*
* If we are handling the tail of the input (x6 != NULL), return the
* final keystream block back to the caller.
*/
-1: cbz x6, 8b
- st1 {v1.16b}, [x6]
+1: cbz x25, 8b
+ st1 {v1.16b}, [x25]
b 8b
-2: cbz x6, 8b
- st1 {v4.16b}, [x6]
+2: cbz x25, 8b
+ st1 {v4.16b}, [x25]
b 8b
-3: cbz x6, 8b
- st1 {v6.16b}, [x6]
+3: cbz x25, 8b
+ st1 {v6.16b}, [x25]
b 8b
-4: cbz x6, 8b
- st1 {v3.16b}, [x6]
+4: cbz x25, 8b
+ st1 {v3.16b}, [x25]
b 8b
-5: cbz x6, 8b
- st1 {v7.16b}, [x6]
+5: cbz x25, 8b
+ st1 {v7.16b}, [x25]
b 8b
-6: cbz x6, 8b
- st1 {v2.16b}, [x6]
+6: cbz x25, 8b
+ st1 {v2.16b}, [x25]
b 8b
-7: cbz x6, 8b
- st1 {v5.16b}, [x6]
+7: cbz x25, 8b
+ st1 {v5.16b}, [x25]
b 8b
ENDPROC(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S
index 16ed3c7ebd37..8061bf0f9c66 100644
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ b/arch/arm64/crypto/crc32-ce-core.S
@@ -100,9 +100,10 @@
dCONSTANT .req d0
qCONSTANT .req q0
- BUF .req x0
- LEN .req x1
- CRC .req x2
+ BUF .req x19
+ LEN .req x20
+ CRC .req x21
+ CONST .req x22
vzr .req v9
@@ -123,7 +124,14 @@ ENTRY(crc32_pmull_le)
ENTRY(crc32c_pmull_le)
adr_l x3, .Lcrc32c_constants
-0: bic LEN, LEN, #15
+0: frame_push 4, 64
+
+ mov BUF, x0
+ mov LEN, x1
+ mov CRC, x2
+ mov CONST, x3
+
+ bic LEN, LEN, #15
ld1 {v1.16b-v4.16b}, [BUF], #0x40
movi vzr.16b, #0
fmov dCONSTANT, CRC
@@ -132,7 +140,7 @@ ENTRY(crc32c_pmull_le)
cmp LEN, #0x40
b.lt less_64
- ldr qCONSTANT, [x3]
+ ldr qCONSTANT, [CONST]
loop_64: /* 64 bytes Full cache line folding */
sub LEN, LEN, #0x40
@@ -162,10 +170,21 @@ loop_64: /* 64 bytes Full cache line folding */
eor v4.16b, v4.16b, v8.16b
cmp LEN, #0x40
- b.ge loop_64
+ b.lt less_64
+
+ if_will_cond_yield_neon
+ stp q1, q2, [sp, #.Lframe_local_offset]
+ stp q3, q4, [sp, #.Lframe_local_offset + 32]
+ do_cond_yield_neon
+ ldp q1, q2, [sp, #.Lframe_local_offset]
+ ldp q3, q4, [sp, #.Lframe_local_offset + 32]
+ ldr qCONSTANT, [CONST]
+ movi vzr.16b, #0
+ endif_yield_neon
+ b loop_64
less_64: /* Folding cache line into 128bit */
- ldr qCONSTANT, [x3, #16]
+ ldr qCONSTANT, [CONST, #16]
pmull2 v5.1q, v1.2d, vCONSTANT.2d
pmull v1.1q, v1.1d, vCONSTANT.1d
@@ -204,8 +223,8 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
/* final 32-bit fold */
- ldr dCONSTANT, [x3, #32]
- ldr d3, [x3, #40]
+ ldr dCONSTANT, [CONST, #32]
+ ldr d3, [CONST, #40]
ext v2.16b, v1.16b, vzr.16b, #4
and v1.16b, v1.16b, v3.16b
@@ -213,7 +232,7 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
- ldr qCONSTANT, [x3, #48]
+ ldr qCONSTANT, [CONST, #48]
and v2.16b, v1.16b, v3.16b
ext v2.16b, vzr.16b, v2.16b, #8
@@ -223,6 +242,7 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
mov w0, v1.s[1]
+ frame_pop
ret
ENDPROC(crc32_pmull_le)
ENDPROC(crc32c_pmull_le)
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index f179c01bd55c..663ea71cdb38 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -74,13 +74,19 @@
.text
.cpu generic+crypto
- arg1_low32 .req w0
- arg2 .req x1
- arg3 .req x2
+ arg1_low32 .req w19
+ arg2 .req x20
+ arg3 .req x21
vzr .req v13
ENTRY(crc_t10dif_pmull)
+ frame_push 3, 128
+
+ mov arg1_low32, w0
+ mov arg2, x1
+ mov arg3, x2
+
movi vzr.16b, #0 // init zero register
// adjust the 16-bit initial_crc value, scale it to 32 bits
@@ -175,8 +181,25 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
subs arg3, arg3, #128
// check if there is another 64B in the buffer to be able to fold
- b.ge _fold_64_B_loop
+ b.lt _fold_64_B_end
+
+ if_will_cond_yield_neon
+ stp q0, q1, [sp, #.Lframe_local_offset]
+ stp q2, q3, [sp, #.Lframe_local_offset + 32]
+ stp q4, q5, [sp, #.Lframe_local_offset + 64]
+ stp q6, q7, [sp, #.Lframe_local_offset + 96]
+ do_cond_yield_neon
+ ldp q0, q1, [sp, #.Lframe_local_offset]
+ ldp q2, q3, [sp, #.Lframe_local_offset + 32]
+ ldp q4, q5, [sp, #.Lframe_local_offset + 64]
+ ldp q6, q7, [sp, #.Lframe_local_offset + 96]
+ ldr_l q10, rk3, x8
+ movi vzr.16b, #0 // init zero register
+ endif_yield_neon
+
+ b _fold_64_B_loop
+_fold_64_B_end:
// at this point, the buffer pointer is pointing at the last y Bytes
// of the buffer the 64B of folded data is in 4 of the vector
// registers: v0, v1, v2, v3
@@ -304,6 +327,7 @@ _barrett:
_cleanup:
// scale the result back to 16 bits
lsr x0, x0, #16
+ frame_pop
ret
_less_than_128:
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 11ebf1ae248a..dcffb9e77589 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -213,22 +213,31 @@
.endm
.macro __pmull_ghash, pn
- ld1 {SHASH.2d}, [x3]
- ld1 {XL.2d}, [x1]
+ frame_push 5
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+
+0: ld1 {SHASH.2d}, [x22]
+ ld1 {XL.2d}, [x20]
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
eor SHASH2.16b, SHASH2.16b, SHASH.16b
__pmull_pre_\pn
/* do the head block first, if supplied */
- cbz x4, 0f
- ld1 {T1.2d}, [x4]
- b 1f
+ cbz x23, 1f
+ ld1 {T1.2d}, [x23]
+ mov x23, xzr
+ b 2f
-0: ld1 {T1.2d}, [x2], #16
- sub w0, w0, #1
+1: ld1 {T1.2d}, [x21], #16
+ sub w19, w19, #1
-1: /* multiply XL by SHASH in GF(2^128) */
+2: /* multiply XL by SHASH in GF(2^128) */
CPU_LE( rev64 T1.16b, T1.16b )
ext T2.16b, XL.16b, XL.16b, #8
@@ -250,9 +259,18 @@ CPU_LE( rev64 T1.16b, T1.16b )
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b
- cbnz w0, 0b
+ cbz w19, 3f
+
+ if_will_cond_yield_neon
+ st1 {XL.2d}, [x20]
+ do_cond_yield_neon
+ b 0b
+ endif_yield_neon
+
+ b 1b
- st1 {XL.2d}, [x1]
+3: st1 {XL.2d}, [x20]
+ frame_pop
ret
.endm
@@ -304,38 +322,55 @@ ENDPROC(pmull_ghash_update_p8)
.endm
.macro pmull_gcm_do_crypt, enc
- ld1 {SHASH.2d}, [x4]
- ld1 {XL.2d}, [x1]
- ldr x8, [x5, #8] // load lower counter
+ frame_push 10
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+ mov x25, x6
+ mov x26, x7
+ .if \enc == 1
+ ldr x27, [sp, #96] // first stacked arg
+ .endif
+
+ ldr x28, [x24, #8] // load lower counter
+CPU_LE( rev x28, x28 )
+
+0: mov x0, x25
+ load_round_keys w26, x0
+ ld1 {SHASH.2d}, [x23]
+ ld1 {XL.2d}, [x20]
movi MASK.16b, #0xe1
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
-CPU_LE( rev x8, x8 )
shl MASK.2d, MASK.2d, #57
eor SHASH2.16b, SHASH2.16b, SHASH.16b
.if \enc == 1
- ld1 {KS.16b}, [x7]
+ ld1 {KS.16b}, [x27]
.endif
-0: ld1 {CTR.8b}, [x5] // load upper counter
- ld1 {INP.16b}, [x3], #16
- rev x9, x8
- add x8, x8, #1
- sub w0, w0, #1
+1: ld1 {CTR.8b}, [x24] // load upper counter
+ ld1 {INP.16b}, [x22], #16
+ rev x9, x28
+ add x28, x28, #1
+ sub w19, w19, #1
ins CTR.d[1], x9 // set lower counter
.if \enc == 1
eor INP.16b, INP.16b, KS.16b // encrypt input
- st1 {INP.16b}, [x2], #16
+ st1 {INP.16b}, [x21], #16
.endif
rev64 T1.16b, INP.16b
- cmp w6, #12
- b.ge 2f // AES-192/256?
+ cmp w26, #12
+ b.ge 4f // AES-192/256?
-1: enc_round CTR, v21
+2: enc_round CTR, v21
ext T2.16b, XL.16b, XL.16b, #8
ext IN1.16b, T1.16b, T1.16b, #8
@@ -390,27 +425,39 @@ CPU_LE( rev x8, x8 )
.if \enc == 0
eor INP.16b, INP.16b, KS.16b
- st1 {INP.16b}, [x2], #16
+ st1 {INP.16b}, [x21], #16
.endif
- cbnz w0, 0b
+ cbz w19, 3f
-CPU_LE( rev x8, x8 )
- st1 {XL.2d}, [x1]
- str x8, [x5, #8] // store lower counter
+ if_will_cond_yield_neon
+ st1 {XL.2d}, [x20]
+ .if \enc == 1
+ st1 {KS.16b}, [x27]
+ .endif
+ do_cond_yield_neon
+ b 0b
+ endif_yield_neon
+ b 1b
+
+3: st1 {XL.2d}, [x20]
.if \enc == 1
- st1 {KS.16b}, [x7]
+ st1 {KS.16b}, [x27]
.endif
+CPU_LE( rev x28, x28 )
+ str x28, [x24, #8] // store lower counter
+
+ frame_pop
ret
-2: b.eq 3f // AES-192?
+4: b.eq 5f // AES-192?
enc_round CTR, v17
enc_round CTR, v18
-3: enc_round CTR, v19
+5: enc_round CTR, v19
enc_round CTR, v20
- b 1b
+ b 2b
.endm
/*
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index cfc9c92814fd..7cf0b1aa6ea8 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -63,11 +63,12 @@ static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
const u8 src[], struct ghash_key const *k,
- u8 ctr[], int rounds, u8 ks[]);
+ u8 ctr[], u32 const rk[], int rounds,
+ u8 ks[]);
asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
const u8 src[], struct ghash_key const *k,
- u8 ctr[], int rounds);
+ u8 ctr[], u32 const rk[], int rounds);
asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
u32 const rk[], int rounds);
@@ -368,26 +369,29 @@ static int gcm_encrypt(struct aead_request *req)
pmull_gcm_encrypt_block(ks, iv, NULL,
num_rounds(&ctx->aes_key));
put_unaligned_be32(3, iv + GCM_IV_SIZE);
+ kernel_neon_end();
- err = skcipher_walk_aead_encrypt(&walk, req, true);
+ err = skcipher_walk_aead_encrypt(&walk, req, false);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ kernel_neon_begin();
pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
walk.src.virt.addr, &ctx->ghash_key,
- iv, num_rounds(&ctx->aes_key), ks);
+ iv, ctx->aes_key.key_enc,
+ num_rounds(&ctx->aes_key), ks);
+ kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes % AES_BLOCK_SIZE);
}
- kernel_neon_end();
} else {
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
num_rounds(&ctx->aes_key));
put_unaligned_be32(2, iv + GCM_IV_SIZE);
- err = skcipher_walk_aead_encrypt(&walk, req, true);
+ err = skcipher_walk_aead_encrypt(&walk, req, false);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -467,15 +471,19 @@ static int gcm_decrypt(struct aead_request *req)
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
num_rounds(&ctx->aes_key));
put_unaligned_be32(2, iv + GCM_IV_SIZE);
+ kernel_neon_end();
- err = skcipher_walk_aead_decrypt(&walk, req, true);
+ err = skcipher_walk_aead_decrypt(&walk, req, false);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ kernel_neon_begin();
pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
walk.src.virt.addr, &ctx->ghash_key,
- iv, num_rounds(&ctx->aes_key));
+ iv, ctx->aes_key.key_enc,
+ num_rounds(&ctx->aes_key));
+ kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes % AES_BLOCK_SIZE);
@@ -483,14 +491,12 @@ static int gcm_decrypt(struct aead_request *req)
if (walk.nbytes)
pmull_gcm_encrypt_block(iv, iv, NULL,
num_rounds(&ctx->aes_key));
-
- kernel_neon_end();
} else {
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
num_rounds(&ctx->aes_key));
put_unaligned_be32(2, iv + GCM_IV_SIZE);
- err = skcipher_walk_aead_decrypt(&walk, req, true);
+ err = skcipher_walk_aead_decrypt(&walk, req, false);
while (walk.nbytes >= AES_BLOCK_SIZE) {
int blocks = walk.nbytes / AES_BLOCK_SIZE;
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 46049850727d..78eb35fb5056 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -69,30 +69,36 @@
* int blocks)
*/
ENTRY(sha1_ce_transform)
+ frame_push 3
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+
/* load round constants */
- loadrc k0.4s, 0x5a827999, w6
+0: loadrc k0.4s, 0x5a827999, w6
loadrc k1.4s, 0x6ed9eba1, w6
loadrc k2.4s, 0x8f1bbcdc, w6
loadrc k3.4s, 0xca62c1d6, w6
/* load state */
- ld1 {dgav.4s}, [x0]
- ldr dgb, [x0, #16]
+ ld1 {dgav.4s}, [x19]
+ ldr dgb, [x19, #16]
/* load sha1_ce_state::finalize */
ldr_l w4, sha1_ce_offsetof_finalize, x4
- ldr w4, [x0, x4]
+ ldr w4, [x19, x4]
/* load input */
-0: ld1 {v8.4s-v11.4s}, [x1], #64
- sub w2, w2, #1
+1: ld1 {v8.4s-v11.4s}, [x20], #64
+ sub w21, w21, #1
CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v9.16b, v9.16b )
CPU_LE( rev32 v10.16b, v10.16b )
CPU_LE( rev32 v11.16b, v11.16b )
-1: add t0.4s, v8.4s, k0.4s
+2: add t0.4s, v8.4s, k0.4s
mov dg0v.16b, dgav.16b
add_update c, ev, k0, 8, 9, 10, 11, dgb
@@ -123,16 +129,25 @@ CPU_LE( rev32 v11.16b, v11.16b )
add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s
- cbnz w2, 0b
+ cbz w21, 3f
+
+ if_will_cond_yield_neon
+ st1 {dgav.4s}, [x19]
+ str dgb, [x19, #16]
+ do_cond_yield_neon
+ b 0b
+ endif_yield_neon
+
+ b 1b
/*
* Final block: add padding and total bit count.
* Skip if the input size was not a round multiple of the block size,
* the padding is handled by the C code in that case.
*/
- cbz x4, 3f
+3: cbz x4, 4f
ldr_l w4, sha1_ce_offsetof_count, x4
- ldr x4, [x0, x4]
+ ldr x4, [x19, x4]
movi v9.2d, #0
mov x8, #0x80000000
movi v10.2d, #0
@@ -141,10 +156,11 @@ CPU_LE( rev32 v11.16b, v11.16b )
mov x4, #0
mov v11.d[0], xzr
mov v11.d[1], x7
- b 1b
+ b 2b
/* store new state */
-3: st1 {dgav.4s}, [x0]
- str dgb, [x0, #16]
+4: st1 {dgav.4s}, [x19]
+ str dgb, [x19, #16]
+ frame_pop
ret
ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 4c3c89b812ce..cd8b36412469 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -79,30 +79,36 @@
*/
.text
ENTRY(sha2_ce_transform)
+ frame_push 3
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+
/* load round constants */
- adr_l x8, .Lsha2_rcon
+0: adr_l x8, .Lsha2_rcon
ld1 { v0.4s- v3.4s}, [x8], #64
ld1 { v4.4s- v7.4s}, [x8], #64
ld1 { v8.4s-v11.4s}, [x8], #64
ld1 {v12.4s-v15.4s}, [x8]
/* load state */
- ld1 {dgav.4s, dgbv.4s}, [x0]
+ ld1 {dgav.4s, dgbv.4s}, [x19]
/* load sha256_ce_state::finalize */
ldr_l w4, sha256_ce_offsetof_finalize, x4
- ldr w4, [x0, x4]
+ ldr w4, [x19, x4]
/* load input */
-0: ld1 {v16.4s-v19.4s}, [x1], #64
- sub w2, w2, #1
+1: ld1 {v16.4s-v19.4s}, [x20], #64
+ sub w21, w21, #1
CPU_LE( rev32 v16.16b, v16.16b )
CPU_LE( rev32 v17.16b, v17.16b )
CPU_LE( rev32 v18.16b, v18.16b )
CPU_LE( rev32 v19.16b, v19.16b )
-1: add t0.4s, v16.4s, v0.4s
+2: add t0.4s, v16.4s, v0.4s
mov dg0v.16b, dgav.16b
mov dg1v.16b, dgbv.16b
@@ -131,16 +137,24 @@ CPU_LE( rev32 v19.16b, v19.16b )
add dgbv.4s, dgbv.4s, dg1v.4s
/* handled all input blocks? */
- cbnz w2, 0b
+ cbz w21, 3f
+
+ if_will_cond_yield_neon
+ st1 {dgav.4s, dgbv.4s}, [x19]
+ do_cond_yield_neon
+ b 0b
+ endif_yield_neon
+
+ b 1b
/*
* Final block: add padding and total bit count.
* Skip if the input size was not a round multiple of the block size,
* the padding is handled by the C code in that case.
*/
- cbz x4, 3f
+3: cbz x4, 4f
ldr_l w4, sha256_ce_offsetof_count, x4
- ldr x4, [x0, x4]
+ ldr x4, [x19, x4]
movi v17.2d, #0
mov x8, #0x80000000
movi v18.2d, #0
@@ -149,9 +163,10 @@ CPU_LE( rev32 v19.16b, v19.16b )
mov x4, #0
mov v19.d[0], xzr
mov v19.d[1], x7
- b 1b
+ b 2b
/* store new state */
-3: st1 {dgav.4s, dgbv.4s}, [x0]
+4: st1 {dgav.4s, dgbv.4s}, [x19]
+ frame_pop
ret
ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped
index 3ce82cc860bc..7c7ce2e3bad6 100644
--- a/arch/arm64/crypto/sha256-core.S_shipped
+++ b/arch/arm64/crypto/sha256-core.S_shipped
@@ -1,3 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// This code is taken from the OpenSSL project but the author (Andy Polyakov)
+// has relicensed it under the GPLv2. Therefore this program is free software;
+// you can redistribute it and/or modify it under the terms of the GNU General
+// Public License version 2 as published by the Free Software Foundation.
+//
+// The original headers, including the original license headers, are
+// included below for completeness.
+
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
@@ -10,8 +20,6 @@
// project. The module is, however, dual licensed under OpenSSL and
// CRYPTOGAMS licenses depending on where you obtain it. For further
// details see http://www.openssl.org/~appro/cryptogams/.
-//
-// Permission to use under GPLv2 terms is granted.
// ====================================================================
//
// SHA256/512 for ARMv8.
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
index 332ad7530690..a7d587fa54f6 100644
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -41,9 +41,16 @@
*/
.text
ENTRY(sha3_ce_transform)
- /* load state */
- add x8, x0, #32
- ld1 { v0.1d- v3.1d}, [x0]
+ frame_push 4
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+
+0: /* load state */
+ add x8, x19, #32
+ ld1 { v0.1d- v3.1d}, [x19]
ld1 { v4.1d- v7.1d}, [x8], #32
ld1 { v8.1d-v11.1d}, [x8], #32
ld1 {v12.1d-v15.1d}, [x8], #32
@@ -51,13 +58,13 @@ ENTRY(sha3_ce_transform)
ld1 {v20.1d-v23.1d}, [x8], #32
ld1 {v24.1d}, [x8]
-0: sub w2, w2, #1
+1: sub w21, w21, #1
mov w8, #24
adr_l x9, .Lsha3_rcon
/* load input */
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b-v31.8b}, [x1], #24
+ ld1 {v25.8b-v28.8b}, [x20], #32
+ ld1 {v29.8b-v31.8b}, [x20], #24
eor v0.8b, v0.8b, v25.8b
eor v1.8b, v1.8b, v26.8b
eor v2.8b, v2.8b, v27.8b
@@ -66,10 +73,10 @@ ENTRY(sha3_ce_transform)
eor v5.8b, v5.8b, v30.8b
eor v6.8b, v6.8b, v31.8b
- tbnz x3, #6, 2f // SHA3-512
+ tbnz x22, #6, 3f // SHA3-512
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b-v30.8b}, [x1], #16
+ ld1 {v25.8b-v28.8b}, [x20], #32
+ ld1 {v29.8b-v30.8b}, [x20], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
eor v9.8b, v9.8b, v27.8b
@@ -77,34 +84,34 @@ ENTRY(sha3_ce_transform)
eor v11.8b, v11.8b, v29.8b
eor v12.8b, v12.8b, v30.8b
- tbnz x3, #4, 1f // SHA3-384 or SHA3-224
+ tbnz x22, #4, 2f // SHA3-384 or SHA3-224
// SHA3-256
- ld1 {v25.8b-v28.8b}, [x1], #32
+ ld1 {v25.8b-v28.8b}, [x20], #32
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
- b 3f
+ b 4f
-1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
+2: tbz x22, #2, 4f // bit 2 cleared? SHA-384
// SHA3-224
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b}, [x1], #8
+ ld1 {v25.8b-v28.8b}, [x20], #32
+ ld1 {v29.8b}, [x20], #8
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
eor v17.8b, v17.8b, v29.8b
- b 3f
+ b 4f
// SHA3-512
-2: ld1 {v25.8b-v26.8b}, [x1], #16
+3: ld1 {v25.8b-v26.8b}, [x20], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
-3: sub w8, w8, #1
+4: sub w8, w8, #1
eor3 v29.16b, v4.16b, v9.16b, v14.16b
eor3 v26.16b, v1.16b, v6.16b, v11.16b
@@ -183,17 +190,33 @@ ENTRY(sha3_ce_transform)
eor v0.16b, v0.16b, v31.16b
- cbnz w8, 3b
- cbnz w2, 0b
+ cbnz w8, 4b
+ cbz w21, 5f
+
+ if_will_cond_yield_neon
+ add x8, x19, #32
+ st1 { v0.1d- v3.1d}, [x19]
+ st1 { v4.1d- v7.1d}, [x8], #32
+ st1 { v8.1d-v11.1d}, [x8], #32
+ st1 {v12.1d-v15.1d}, [x8], #32
+ st1 {v16.1d-v19.1d}, [x8], #32
+ st1 {v20.1d-v23.1d}, [x8], #32
+ st1 {v24.1d}, [x8]
+ do_cond_yield_neon
+ b 0b
+ endif_yield_neon
+
+ b 1b
/* save state */
- st1 { v0.1d- v3.1d}, [x0], #32
- st1 { v4.1d- v7.1d}, [x0], #32
- st1 { v8.1d-v11.1d}, [x0], #32
- st1 {v12.1d-v15.1d}, [x0], #32
- st1 {v16.1d-v19.1d}, [x0], #32
- st1 {v20.1d-v23.1d}, [x0], #32
- st1 {v24.1d}, [x0]
+5: st1 { v0.1d- v3.1d}, [x19], #32
+ st1 { v4.1d- v7.1d}, [x19], #32
+ st1 { v8.1d-v11.1d}, [x19], #32
+ st1 {v12.1d-v15.1d}, [x19], #32
+ st1 {v16.1d-v19.1d}, [x19], #32
+ st1 {v20.1d-v23.1d}, [x19], #32
+ st1 {v24.1d}, [x19]
+ frame_pop
ret
ENDPROC(sha3_ce_transform)
diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl
index c55efb308544..2d8655d5b1af 100644
--- a/arch/arm64/crypto/sha512-armv8.pl
+++ b/arch/arm64/crypto/sha512-armv8.pl
@@ -1,4 +1,14 @@
#! /usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
+
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
@@ -11,8 +21,6 @@
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
-#
-# Permission to use under GPLv2 terms is granted.
# ====================================================================
#
# SHA256/512 for ARMv8.
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
index 7f3bca5c59a2..ce65e3abe4f2 100644
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -107,17 +107,23 @@
*/
.text
ENTRY(sha512_ce_transform)
+ frame_push 3
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+
/* load state */
- ld1 {v8.2d-v11.2d}, [x0]
+0: ld1 {v8.2d-v11.2d}, [x19]
/* load first 4 round constants */
adr_l x3, .Lsha512_rcon
ld1 {v20.2d-v23.2d}, [x3], #64
/* load input */
-0: ld1 {v12.2d-v15.2d}, [x1], #64
- ld1 {v16.2d-v19.2d}, [x1], #64
- sub w2, w2, #1
+1: ld1 {v12.2d-v15.2d}, [x20], #64
+ ld1 {v16.2d-v19.2d}, [x20], #64
+ sub w21, w21, #1
CPU_LE( rev64 v12.16b, v12.16b )
CPU_LE( rev64 v13.16b, v13.16b )
@@ -196,9 +202,18 @@ CPU_LE( rev64 v19.16b, v19.16b )
add v11.2d, v11.2d, v3.2d
/* handled all input blocks? */
- cbnz w2, 0b
+ cbz w21, 3f
+
+ if_will_cond_yield_neon
+ st1 {v8.2d-v11.2d}, [x19]
+ do_cond_yield_neon
+ b 0b
+ endif_yield_neon
+
+ b 1b
/* store new state */
-3: st1 {v8.2d-v11.2d}, [x0]
+3: st1 {v8.2d-v11.2d}, [x19]
+ frame_pop
ret
ENDPROC(sha512_ce_transform)
diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped
index bd0f59f06c9d..e063a6106720 100644
--- a/arch/arm64/crypto/sha512-core.S_shipped
+++ b/arch/arm64/crypto/sha512-core.S_shipped
@@ -1,3 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// This code is taken from the OpenSSL project but the author (Andy Polyakov)
+// has relicensed it under the GPLv2. Therefore this program is free software;
+// you can redistribute it and/or modify it under the terms of the GNU General
+// Public License version 2 as published by the Free Software Foundation.
+//
+// The original headers, including the original license headers, are
+// included below for completeness.
+
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
@@ -10,8 +20,6 @@
// project. The module is, however, dual licensed under OpenSSL and
// CRYPTOGAMS licenses depending on where you obtain it. For further
// details see http://www.openssl.org/~appro/cryptogams/.
-//
-// Permission to use under GPLv2 terms is granted.
// ====================================================================
//
// SHA256/512 for ARMv8.
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
new file mode 100644
index 000000000000..af3bfbc3f4d4
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8
+ .set .Lv\b\().4s, \b
+ .endr
+
+ .macro sm4e, rd, rn
+ .inst 0xcec08400 | .L\rd | (.L\rn << 5)
+ .endm
+
+ /*
+ * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
+ */
+ .text
+ENTRY(sm4_ce_do_crypt)
+ ld1 {v8.4s}, [x2]
+ ld1 {v0.4s-v3.4s}, [x0], #64
+CPU_LE( rev32 v8.16b, v8.16b )
+ ld1 {v4.4s-v7.4s}, [x0]
+ sm4e v8.4s, v0.4s
+ sm4e v8.4s, v1.4s
+ sm4e v8.4s, v2.4s
+ sm4e v8.4s, v3.4s
+ sm4e v8.4s, v4.4s
+ sm4e v8.4s, v5.4s
+ sm4e v8.4s, v6.4s
+ sm4e v8.4s, v7.4s
+ rev64 v8.4s, v8.4s
+ ext v8.16b, v8.16b, v8.16b, #8
+CPU_LE( rev32 v8.16b, v8.16b )
+ st1 {v8.4s}, [x1]
+ ret
+ENDPROC(sm4_ce_do_crypt)
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
new file mode 100644
index 000000000000..b7fb5274b250
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/sm4.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/types.h>
+
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-ce");
+MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
+
+static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!may_use_simd()) {
+ crypto_sm4_encrypt(tfm, out, in);
+ } else {
+ kernel_neon_begin();
+ sm4_ce_do_crypt(ctx->rkey_enc, out, in);
+ kernel_neon_end();
+ }
+}
+
+static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!may_use_simd()) {
+ crypto_sm4_decrypt(tfm, out, in);
+ } else {
+ kernel_neon_begin();
+ sm4_ce_do_crypt(ctx->rkey_dec, out, in);
+ kernel_neon_end();
+ }
+}
+
+static struct crypto_alg sm4_ce_alg = {
+ .cra_name = "sm4",
+ .cra_driver_name = "sm4-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_sm4_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_u.cipher = {
+ .cia_min_keysize = SM4_KEY_SIZE,
+ .cia_max_keysize = SM4_KEY_SIZE,
+ .cia_setkey = crypto_sm4_set_key,
+ .cia_encrypt = sm4_ce_encrypt,
+ .cia_decrypt = sm4_ce_decrypt
+ }
+};
+
+static int __init sm4_ce_mod_init(void)
+{
+ return crypto_register_alg(&sm4_ce_alg);
+}
+
+static void __exit sm4_ce_mod_fini(void)
+{
+ crypto_unregister_alg(&sm4_ce_alg);
+}
+
+module_cpu_feature_match(SM3, sm4_ce_mod_init);
+module_exit(sm4_ce_mod_fini);
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 669028172fd6..a91933b1e2e6 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -5,6 +5,8 @@
#include <asm/cpucaps.h>
#include <asm/insn.h>
+#define ARM64_CB_PATCH ARM64_NCAPS
+
#ifndef __ASSEMBLY__
#include <linux/init.h>
@@ -22,12 +24,19 @@ struct alt_instr {
u8 alt_len; /* size of new instruction(s), <= orig_len */
};
+typedef void (*alternative_cb_t)(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
void __init apply_alternatives_all(void);
void apply_alternatives(void *start, size_t length);
-#define ALTINSTR_ENTRY(feature) \
+#define ALTINSTR_ENTRY(feature,cb) \
" .word 661b - .\n" /* label */ \
+ " .if " __stringify(cb) " == 0\n" \
" .word 663f - .\n" /* new instruction */ \
+ " .else\n" \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .endif\n" \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
@@ -45,15 +54,18 @@ void apply_alternatives(void *start, size_t length);
* but most assemblers die if insn1 or insn2 have a .inst. This should
* be fixed in a binutils release posterior to 2.25.51.0.2 (anything
* containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
*/
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature) \
+ ALTINSTR_ENTRY(feature,cb) \
".popsection\n" \
+ " .if " __stringify(cb) " == 0\n" \
".pushsection .altinstr_replacement, \"a\"\n" \
"663:\n\t" \
newinstr "\n" \
@@ -61,11 +73,17 @@ void apply_alternatives(void *start, size_t length);
".popsection\n\t" \
".org . - (664b-663b) + (662b-661b)\n\t" \
".org . - (662b-661b) + (664b-663b)\n" \
+ ".else\n\t" \
+ "663:\n\t" \
+ "664:\n\t" \
+ ".endif\n" \
".endif\n"
#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+ __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
+#define ALTERNATIVE_CB(oldinstr, cb) \
+ __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
#else
#include <asm/assembler.h>
@@ -132,6 +150,14 @@ void apply_alternatives(void *start, size_t length);
661:
.endm
+.macro alternative_cb cb
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+ .popsection
+661:
+.endm
+
/*
* Provide the other half of the alternative code sequence.
*/
@@ -158,6 +184,13 @@ void apply_alternatives(void *start, size_t length);
.endm
/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
* Provides a trivial alternative or default sequence consisting solely
* of NOPs. The number of NOPs is chosen automatically to match the
* previous case.
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 053d83e8db6f..0bcc98dbba56 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -565,4 +565,140 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
#endif
.endm
+ /*
+ * frame_push - Push @regcount callee saved registers to the stack,
+ * starting at x19, as well as x29/x30, and set x29 to
+ * the new value of sp. Add @extra bytes of stack space
+ * for locals.
+ */
+ .macro frame_push, regcount:req, extra
+ __frame st, \regcount, \extra
+ .endm
+
+ /*
+ * frame_pop - Pop the callee saved registers from the stack that were
+ * pushed in the most recent call to frame_push, as well
+ * as x29/x30 and any extra stack space that may have been
+ * allocated.
+ */
+ .macro frame_pop
+ __frame ld
+ .endm
+
+ .macro __frame_regs, reg1, reg2, op, num
+ .if .Lframe_regcount == \num
+ \op\()r \reg1, [sp, #(\num + 1) * 8]
+ .elseif .Lframe_regcount > \num
+ \op\()p \reg1, \reg2, [sp, #(\num + 1) * 8]
+ .endif
+ .endm
+
+ .macro __frame, op, regcount, extra=0
+ .ifc \op, st
+ .if (\regcount) < 0 || (\regcount) > 10
+ .error "regcount should be in the range [0 ... 10]"
+ .endif
+ .if ((\extra) % 16) != 0
+ .error "extra should be a multiple of 16 bytes"
+ .endif
+ .ifdef .Lframe_regcount
+ .if .Lframe_regcount != -1
+ .error "frame_push/frame_pop may not be nested"
+ .endif
+ .endif
+ .set .Lframe_regcount, \regcount
+ .set .Lframe_extra, \extra
+ .set .Lframe_local_offset, ((\regcount + 3) / 2) * 16
+ stp x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
+ mov x29, sp
+ .endif
+
+ __frame_regs x19, x20, \op, 1
+ __frame_regs x21, x22, \op, 3
+ __frame_regs x23, x24, \op, 5
+ __frame_regs x25, x26, \op, 7
+ __frame_regs x27, x28, \op, 9
+
+ .ifc \op, ld
+ .if .Lframe_regcount == -1
+ .error "frame_push/frame_pop may not be nested"
+ .endif
+ ldp x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
+ .set .Lframe_regcount, -1
+ .endif
+ .endm
+
+/*
+ * Check whether to yield to another runnable task from kernel mode NEON code
+ * (which runs with preemption disabled).
+ *
+ * if_will_cond_yield_neon
+ * // pre-yield patchup code
+ * do_cond_yield_neon
+ * // post-yield patchup code
+ * endif_yield_neon <label>
+ *
+ * where <label> is optional, and marks the point where execution will resume
+ * after a yield has been performed. If omitted, execution resumes right after
+ * the endif_yield_neon invocation. Note that the entire sequence, including
+ * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT
+ * is not defined.
+ *
+ * As a convenience, in the case where no patchup code is required, the above
+ * sequence may be abbreviated to
+ *
+ * cond_yield_neon <label>
+ *
+ * Note that the patchup code does not support assembler directives that change
+ * the output section, any use of such directives is undefined.
+ *
+ * The yield itself consists of the following:
+ * - Check whether the preempt count is exactly 1, in which case disabling
+ * preemption once will make the task preemptible. If this is not the case,
+ * yielding is pointless.
+ * - Check whether TIF_NEED_RESCHED is set, and if so, disable and re-enable
+ * kernel mode NEON (which will trigger a reschedule), and branch to the
+ * yield fixup code.
+ *
+ * This macro sequence may clobber all CPU state that is not guaranteed by the
+ * AAPCS to be preserved across an ordinary function call.
+ */
+
+ .macro cond_yield_neon, lbl
+ if_will_cond_yield_neon
+ do_cond_yield_neon
+ endif_yield_neon \lbl
+ .endm
+
+ .macro if_will_cond_yield_neon
+#ifdef CONFIG_PREEMPT
+ get_thread_info x0
+ ldr w1, [x0, #TSK_TI_PREEMPT]
+ ldr x0, [x0, #TSK_TI_FLAGS]
+ cmp w1, #PREEMPT_DISABLE_OFFSET
+ csel x0, x0, xzr, eq
+ tbnz x0, #TIF_NEED_RESCHED, .Lyield_\@ // needs rescheduling?
+ /* fall through to endif_yield_neon */
+ .subsection 1
+.Lyield_\@ :
+#else
+ .section ".discard.cond_yield_neon", "ax"
+#endif
+ .endm
+
+ .macro do_cond_yield_neon
+ bl kernel_neon_end
+ bl kernel_neon_begin
+ .endm
+
+ .macro endif_yield_neon, lbl
+ .ifnb \lbl
+ b \lbl
+ .else
+ b .Lyield_out_\@
+ .endif
+ .previous
+.Lyield_out_\@ :
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 9ef0797380cb..f9b0b09153e0 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v)
/* LSE atomics */
" mvn %w[i], %w[i]\n"
" stclr %w[i], %[v]")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
+ : [i] "+&r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: __LL_SC_CLOBBERS);
}
@@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \
/* LSE atomics */ \
" mvn %w[i], %w[i]\n" \
" ldclr" #mb " %w[i], %w[i], %[v]") \
- : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v)
/* LSE atomics */
" neg %w[i], %w[i]\n"
" stadd %w[i], %[v]")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
+ : [i] "+&r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: __LL_SC_CLOBBERS);
}
@@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \
" neg %w[i], %w[i]\n" \
" ldadd" #mb " %w[i], w30, %[v]\n" \
" add %w[i], %w[i], w30") \
- : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS , ##cl); \
\
@@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \
/* LSE atomics */ \
" neg %w[i], %w[i]\n" \
" ldadd" #mb " %w[i], %w[i], %[v]") \
- : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
/* LSE atomics */
" mvn %[i], %[i]\n"
" stclr %[i], %[v]")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
+ : [i] "+&r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: __LL_SC_CLOBBERS);
}
@@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \
/* LSE atomics */ \
" mvn %[i], %[i]\n" \
" ldclr" #mb " %[i], %[i], %[v]") \
- : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
/* LSE atomics */
" neg %[i], %[i]\n"
" stadd %[i], %[v]")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
+ : [i] "+&r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: __LL_SC_CLOBBERS);
}
@@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], x30, %[v]\n" \
" add %[i], %[i], x30") \
- : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \
/* LSE atomics */ \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], %[i], %[v]") \
- : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : [i] "+&r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: __LL_SC_CLOBBERS, ##cl); \
\
@@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
" sub x30, x30, %[ret]\n"
" cbnz x30, 1b\n"
"2:")
- : [ret] "+r" (x0), [v] "+Q" (v->counter)
+ : [ret] "+&r" (x0), [v] "+Q" (v->counter)
:
: __LL_SC_CLOBBERS, "cc", "memory");
@@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \
" eor %[old1], %[old1], %[oldval1]\n" \
" eor %[old2], %[old2], %[oldval2]\n" \
" orr %[old1], %[old1], %[old2]") \
- : [old1] "+r" (x0), [old2] "+r" (x1), \
+ : [old1] "+&r" (x0), [old2] "+&r" (x1), \
[v] "+Q" (*(unsigned long *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7dfcec4700fe..0094c6653b06 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -140,10 +140,8 @@ static inline void __flush_icache_all(void)
dsb(ish);
}
-#define flush_dcache_mmap_lock(mapping) \
- spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
- spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping) do { } while (0)
+#define flush_dcache_mmap_unlock(mapping) do { } while (0)
/*
* We don't appear to need to do anything here. In fact, if we did, we'd
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index c00c62e1a4a3..1a037b94eba1 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -34,7 +34,6 @@
typedef u32 compat_size_t;
typedef s32 compat_ssize_t;
-typedef s32 compat_time_t;
typedef s32 compat_clock_t;
typedef s32 compat_pid_t;
typedef u16 __compat_uid_t;
@@ -66,16 +65,6 @@ typedef u32 compat_ulong_t;
typedef u64 compat_u64;
typedef u32 compat_uptr_t;
-struct compat_timespec {
- compat_time_t tv_sec;
- s32 tv_nsec;
-};
-
-struct compat_timeval {
- compat_time_t tv_sec;
- s32 tv_usec;
-};
-
struct compat_stat {
#ifdef __AARCH64EB__
short st_dev;
@@ -192,10 +181,10 @@ struct compat_ipc64_perm {
struct compat_semid64_ds {
struct compat_ipc64_perm sem_perm;
- compat_time_t sem_otime;
- compat_ulong_t __unused1;
- compat_time_t sem_ctime;
- compat_ulong_t __unused2;
+ compat_ulong_t sem_otime;
+ compat_ulong_t sem_otime_high;
+ compat_ulong_t sem_ctime;
+ compat_ulong_t sem_ctime_high;
compat_ulong_t sem_nsems;
compat_ulong_t __unused3;
compat_ulong_t __unused4;
@@ -203,12 +192,12 @@ struct compat_semid64_ds {
struct compat_msqid64_ds {
struct compat_ipc64_perm msg_perm;
- compat_time_t msg_stime;
- compat_ulong_t __unused1;
- compat_time_t msg_rtime;
- compat_ulong_t __unused2;
- compat_time_t msg_ctime;
- compat_ulong_t __unused3;
+ compat_ulong_t msg_stime;
+ compat_ulong_t msg_stime_high;
+ compat_ulong_t msg_rtime;
+ compat_ulong_t msg_rtime_high;
+ compat_ulong_t msg_ctime;
+ compat_ulong_t msg_ctime_high;
compat_ulong_t msg_cbytes;
compat_ulong_t msg_qnum;
compat_ulong_t msg_qbytes;
@@ -221,12 +210,12 @@ struct compat_msqid64_ds {
struct compat_shmid64_ds {
struct compat_ipc64_perm shm_perm;
compat_size_t shm_segsz;
- compat_time_t shm_atime;
- compat_ulong_t __unused1;
- compat_time_t shm_dtime;
- compat_ulong_t __unused2;
- compat_time_t shm_ctime;
- compat_ulong_t __unused3;
+ compat_ulong_t shm_atime;
+ compat_ulong_t shm_atime_high;
+ compat_ulong_t shm_dtime;
+ compat_ulong_t shm_dtime_high;
+ compat_ulong_t shm_ctime;
+ compat_ulong_t shm_ctime_high;
compat_pid_t shm_cpid;
compat_pid_t shm_lpid;
compat_ulong_t shm_nattch;
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 21bb624e0a7a..bc51b72fafd4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -32,7 +32,7 @@
#define ARM64_HAS_VIRT_HOST_EXTN 11
#define ARM64_WORKAROUND_CAVIUM_27456 12
#define ARM64_HAS_32BIT_EL0 13
-#define ARM64_HYP_OFFSET_LOW 14
+#define ARM64_HARDEN_EL2_VECTORS 14
#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
#define ARM64_HAS_NO_FPSIMD 16
#define ARM64_WORKAROUND_REPEAT_TLBI 17
@@ -43,13 +43,12 @@
#define ARM64_SVE 22
#define ARM64_UNMAP_KERNEL_AT_EL0 23
#define ARM64_HARDEN_BRANCH_PREDICTOR 24
-#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
-#define ARM64_HAS_RAS_EXTN 26
-#define ARM64_WORKAROUND_843419 27
-#define ARM64_HAS_CACHE_IDC 28
-#define ARM64_HAS_CACHE_DIC 29
-#define ARM64_HW_DBM 30
+#define ARM64_HAS_RAS_EXTN 25
+#define ARM64_WORKAROUND_843419 26
+#define ARM64_HAS_CACHE_IDC 27
+#define ARM64_HAS_CACHE_DIC 28
+#define ARM64_HW_DBM 29
-#define ARM64_NCAPS 31
+#define ARM64_NCAPS 30
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 30014a9f8f2b..ea690b3562af 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -75,6 +75,7 @@
#define ARM_CPU_IMP_CAVIUM 0x43
#define ARM_CPU_IMP_BRCM 0x42
#define ARM_CPU_IMP_QCOM 0x51
+#define ARM_CPU_IMP_NVIDIA 0x4E
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -99,6 +100,9 @@
#define QCOM_CPU_PART_FALKOR 0xC00
#define QCOM_CPU_PART_KRYO 0x200
+#define NVIDIA_CPU_PART_DENVER 0x003
+#define NVIDIA_CPU_PART_CARMEL 0x004
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -114,6 +118,8 @@
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
+#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
+#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4214c38d016b..f62c56b1793f 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -70,6 +70,7 @@ enum aarch64_insn_imm_type {
AARCH64_INSN_IMM_6,
AARCH64_INSN_IMM_S,
AARCH64_INSN_IMM_R,
+ AARCH64_INSN_IMM_N,
AARCH64_INSN_IMM_MAX
};
@@ -314,6 +315,11 @@ __AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000)
__AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000)
__AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000)
__AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000)
+__AARCH64_INSN_FUNCS(and_imm, 0x7F800000, 0x12000000)
+__AARCH64_INSN_FUNCS(orr_imm, 0x7F800000, 0x32000000)
+__AARCH64_INSN_FUNCS(eor_imm, 0x7F800000, 0x52000000)
+__AARCH64_INSN_FUNCS(ands_imm, 0x7F800000, 0x72000000)
+__AARCH64_INSN_FUNCS(extr, 0x7FA00000, 0x13800000)
__AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000)
__AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000)
__AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000)
@@ -423,6 +429,16 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
int shift,
enum aarch64_insn_variant variant,
enum aarch64_insn_logic_type type);
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u64 imm);
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rm,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u8 lsb);
u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target,
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index b0c84171e6a3..6dd285e979c9 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -25,6 +25,7 @@
/* Hyp Configuration Register (HCR) bits */
#define HCR_TEA (UL(1) << 37)
#define HCR_TERR (UL(1) << 36)
+#define HCR_TLOR (UL(1) << 35)
#define HCR_E2H (UL(1) << 34)
#define HCR_ID (UL(1) << 33)
#define HCR_CD (UL(1) << 32)
@@ -64,6 +65,7 @@
/*
* The bits we set in HCR:
+ * TLOR: Trap LORegion register accesses
* RW: 64bit by default, can be overridden for 32bit VMs
* TAC: Trap ACTLR
* TSC: Trap SMC
@@ -81,9 +83,9 @@
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
- HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
+ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
+ HCR_FMO | HCR_IMO)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* TCR_EL2 Registers bits */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24961b732e65..f6648a3e4152 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,6 +33,7 @@
#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+/* Translate a kernel address of @sym into its equivalent linear mapping */
#define kvm_ksym_ref(sym) \
({ \
void *val = &sym; \
@@ -57,7 +58,9 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
-extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
+
+extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_ich_vtr_el2(void);
extern u64 __vgic_v3_read_vmcr(void);
@@ -68,7 +71,19 @@ extern u32 __kvm_get_mdcr_el2(void);
extern u32 __init_stage2_translation(void);
-extern void __qcom_hyp_sanitize_btac_predictors(void);
+#else /* __ASSEMBLY__ */
+
+.macro get_host_ctxt reg, tmp
+ adr_l \reg, kvm_host_cpu_state
+ mrs \tmp, tpidr_el2
+ add \reg, \reg, \tmp
+.endm
+
+.macro get_vcpu_ptr vcpu, ctxt
+ get_host_ctxt \ctxt, \vcpu
+ ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+ kern_hyp_va \vcpu
+.endm
#endif
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 413dc82b1e89..1dab3a984608 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -26,13 +26,15 @@
#include <asm/esr.h>
#include <asm/kvm_arm.h>
+#include <asm/kvm_hyp.h>
#include <asm/kvm_mmio.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu);
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v);
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
@@ -45,6 +47,11 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu);
void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
+static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.hcr_el2 & HCR_RW);
+}
+
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
@@ -59,16 +66,19 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
-}
-static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.hcr_el2;
+ /*
+ * TID3: trap feature register accesses that we virtualise.
+ * For now this is conditional, since no AArch32 feature regs
+ * are currently virtualised.
+ */
+ if (!vcpu_el1_is_32bit(vcpu))
+ vcpu->arch.hcr_el2 |= HCR_TID3;
}
-static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr_el2 = hcr;
+ return (unsigned long *)&vcpu->arch.hcr_el2;
}
static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
@@ -81,11 +91,27 @@ static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
}
-static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
}
+static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ return read_sysreg_el1(elr);
+ else
+ return *__vcpu_elr_el1(vcpu);
+}
+
+static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v)
+{
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ write_sysreg_el1(v, elr);
+ else
+ *__vcpu_elr_el1(vcpu) = v;
+}
+
static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
@@ -135,13 +161,28 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
}
-/* Get vcpu SPSR for current mode */
-static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
- return vcpu_spsr32(vcpu);
+ return vcpu_read_spsr32(vcpu);
- return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ return read_sysreg_el1(spsr);
+ else
+ return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
+{
+ if (vcpu_mode_is_32bit(vcpu)) {
+ vcpu_write_spsr32(vcpu, v);
+ return;
+ }
+
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ write_sysreg_el1(v, spsr);
+ else
+ vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
}
static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
@@ -282,15 +323,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
- return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+ return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
{
- if (vcpu_mode_is_32bit(vcpu))
+ if (vcpu_mode_is_32bit(vcpu)) {
*vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
- else
- vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+ } else {
+ u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ sctlr |= (1 << 25);
+ vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1);
+ }
}
static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
@@ -298,7 +342,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
if (vcpu_mode_is_32bit(vcpu))
return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
- return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+ return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
}
static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 596f8e414a4c..469de8acd06f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -75,6 +75,9 @@ struct kvm_arch {
/* Interrupt controller */
struct vgic_dist vgic;
+
+ /* Mandated version of PSCI */
+ u32 psci_version;
};
#define KVM_NR_MEM_OBJS 40
@@ -272,9 +275,6 @@ struct kvm_vcpu_arch {
/* IO related fields */
struct kvm_decode mmio_decode;
- /* Interrupt related fields */
- u64 irq_lines; /* IRQ and FIQ levels */
-
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
@@ -287,10 +287,25 @@ struct kvm_vcpu_arch {
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
+
+ /* True when deferrable sysregs are loaded on the physical CPU,
+ * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
+ bool sysregs_loaded_on_cpu;
};
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
-#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
+
+/*
+ * Only use __vcpu_sys_reg if you know you want the memory backed version of a
+ * register, and not the one most recently accessed by a running VCPU. For
+ * example, for userspace access or for system registers that are never context
+ * switched, but only emulated.
+ */
+#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
+
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
+
/*
* CP14 and CP15 live in the same array, as they are backed by the
* same system registers.
@@ -298,14 +313,6 @@ struct kvm_vcpu_arch {
#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)])
#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)])
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r))
-#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1)
-#else
-#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1)
-#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r))
-#endif
-
struct kvm_vm_stat {
ulong remote_tlb_flush;
};
@@ -358,10 +365,15 @@ int kvm_perf_teardown(void);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
+void __kvm_set_tpidr_el2(u64 tpidr_el2);
+DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
{
+ u64 tpidr_el2;
+
/*
* Call initialization code, and switch to the full blown HYP code.
* If the cpucaps haven't been finalized yet, something has gone very
@@ -370,6 +382,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
*/
BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+ /*
+ * Calculate the raw per-cpu offset without a translation from the
+ * kernel's mapping to the linear mapping, and store it in tpidr_el2
+ * so that we can use adr_l to access per-cpu variables in EL2.
+ */
+ tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
+ - (u64)kvm_ksym_ref(kvm_host_cpu_state);
+
+ kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
}
static inline void kvm_arch_hardware_unsetup(void) {}
@@ -416,6 +438,13 @@ static inline void kvm_arm_vhe_guest_enter(void)
static inline void kvm_arm_vhe_guest_exit(void)
{
local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+ /*
+ * When we exit from the guest we change a number of CPU configuration
+ * parameters, such as traps. Make sure these changes take effect
+ * before running the host or additional guests.
+ */
+ isb();
}
static inline bool kvm_arm_harden_branch_predictor(void)
@@ -423,4 +452,7 @@ static inline bool kvm_arm_harden_branch_predictor(void)
return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
}
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index f26f9cd70c72..384c34397619 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -120,37 +120,38 @@ typeof(orig) * __hyp_text fname(void) \
return val; \
}
-void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __timer_enable_traps(struct kvm_vcpu *vcpu);
void __timer_disable_traps(struct kvm_vcpu *vcpu);
-void __sysreg_save_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt);
+void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
void __sysreg32_save_state(struct kvm_vcpu *vcpu);
void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
-void __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
-void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
+void __debug_switch_to_host(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
bool __fpsimd_enabled(void);
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
+void deactivate_traps_vhe_put(void);
+
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7faed6e48b46..6128992c2ded 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -69,9 +69,6 @@
* mappings, and none of this applies in that case.
*/
-#define HYP_PAGE_OFFSET_HIGH_MASK ((UL(1) << VA_BITS) - 1)
-#define HYP_PAGE_OFFSET_LOW_MASK ((UL(1) << (VA_BITS - 1)) - 1)
-
#ifdef __ASSEMBLY__
#include <asm/alternative.h>
@@ -81,28 +78,19 @@
* Convert a kernel VA into a HYP VA.
* reg: VA to be converted.
*
- * This generates the following sequences:
- * - High mask:
- * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
- * nop
- * - Low mask:
- * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
- * and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK
- * - VHE:
- * nop
- * nop
- *
- * The "low mask" version works because the mask is a strict subset of
- * the "high mask", hence performing the first mask for nothing.
- * Should be completely invisible on any viable CPU.
+ * The actual code generation takes place in kvm_update_va_mask, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_va_mask uses the
+ * specific registers encoded in the instructions).
*/
.macro kern_hyp_va reg
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK
-alternative_else_nop_endif
-alternative_if ARM64_HYP_OFFSET_LOW
- and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK
-alternative_else_nop_endif
+alternative_cb kvm_update_va_mask
+ and \reg, \reg, #1 /* mask with va_mask */
+ ror \reg, \reg, #1 /* rotate to the first tag bit */
+ add \reg, \reg, #0 /* insert the low 12 bits of the tag */
+ add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
+ ror \reg, \reg, #63 /* rotate back */
+alternative_cb_end
.endm
#else
@@ -113,24 +101,44 @@ alternative_else_nop_endif
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
+void kvm_update_va_mask(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
static inline unsigned long __kern_hyp_va(unsigned long v)
{
- asm volatile(ALTERNATIVE("and %0, %0, %1",
- "nop",
- ARM64_HAS_VIRT_HOST_EXTN)
- : "+r" (v)
- : "i" (HYP_PAGE_OFFSET_HIGH_MASK));
- asm volatile(ALTERNATIVE("nop",
- "and %0, %0, %1",
- ARM64_HYP_OFFSET_LOW)
- : "+r" (v)
- : "i" (HYP_PAGE_OFFSET_LOW_MASK));
+ asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
+ "ror %0, %0, #1\n"
+ "add %0, %0, #0\n"
+ "add %0, %0, #0, lsl 12\n"
+ "ror %0, %0, #63\n",
+ kvm_update_va_mask)
+ : "+r" (v));
return v;
}
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
/*
+ * Obtain the PC-relative address of a kernel symbol
+ * s: symbol
+ *
+ * The goal of this macro is to return a symbol's address based on a
+ * PC-relative computation, as opposed to a loading the VA from a
+ * constant pool or something similar. This works well for HYP, as an
+ * absolute VA is guaranteed to be wrong. Only use this if trying to
+ * obtain the address of a symbol (i.e. not something you obtained by
+ * following a pointer).
+ */
+#define hyp_symbol_addr(s) \
+ ({ \
+ typeof(s) *addr; \
+ asm("adrp %0, %1\n" \
+ "add %0, %0, :lo12:%1\n" \
+ : "=r" (addr) : "S" (&s)); \
+ addr; \
+ })
+
+/*
* We currently only support a 40bit IPA.
*/
#define KVM_PHYS_SHIFT (40)
@@ -140,7 +148,11 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
#include <asm/stage2_pgtable.h>
int create_hyp_mappings(void *from, void *to, pgprot_t prot);
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+ void __iomem **kaddr,
+ void __iomem **haddr);
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+ void **haddr);
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
@@ -249,7 +261,7 @@ struct kvm;
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
- return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+ return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
}
static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
@@ -348,36 +360,111 @@ static inline unsigned int kvm_get_vmid_bits(void)
return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
}
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+ gpa_t gpa, void *data, unsigned long len)
+{
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ int ret = kvm_read_guest(kvm, gpa, data, len);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ return ret;
+}
+
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+/*
+ * EL2 vectors can be mapped and rerouted in a number of ways,
+ * depending on the kernel configuration and CPU present:
+ *
+ * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the
+ * hardening sequence is placed in one of the vector slots, which is
+ * executed before jumping to the real vectors.
+ *
+ * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the
+ * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the
+ * hardening sequence is mapped next to the idmap page, and executed
+ * before jumping to the real vectors.
+ *
+ * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an
+ * empty slot is selected, mapped next to the idmap page, and
+ * executed before jumping to the real vectors.
+ *
+ * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with
+ * VHE, as we don't have hypervisor-specific mappings. If the system
+ * is VHE and yet selects this capability, it will be ignored.
+ */
#include <asm/mmu.h>
+extern void *__kvm_bp_vect_base;
+extern int __kvm_harden_el2_vector_slot;
+
static inline void *kvm_get_hyp_vector(void)
{
struct bp_hardening_data *data = arm64_get_bp_hardening_data();
- void *vect = kvm_ksym_ref(__kvm_hyp_vector);
+ void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
+ int slot = -1;
- if (data->fn) {
- vect = __bp_harden_hyp_vecs_start +
- data->hyp_vectors_slot * SZ_2K;
+ if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) {
+ vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start));
+ slot = data->hyp_vectors_slot;
+ }
- if (!has_vhe())
- vect = lm_alias(vect);
+ if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) {
+ vect = __kvm_bp_vect_base;
+ if (slot == -1)
+ slot = __kvm_harden_el2_vector_slot;
}
+ if (slot != -1)
+ vect += slot * SZ_2K;
+
return vect;
}
+/* This is only called on a !VHE system */
static inline int kvm_map_vectors(void)
{
- return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start),
- kvm_ksym_ref(__bp_harden_hyp_vecs_end),
- PAGE_HYP_EXEC);
-}
+ /*
+ * HBP = ARM64_HARDEN_BRANCH_PREDICTOR
+ * HEL2 = ARM64_HARDEN_EL2_VECTORS
+ *
+ * !HBP + !HEL2 -> use direct vectors
+ * HBP + !HEL2 -> use hardened vectors in place
+ * !HBP + HEL2 -> allocate one vector slot and use exec mapping
+ * HBP + HEL2 -> use hardened vertors and use exec mapping
+ */
+ if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) {
+ __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start);
+ __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
+ }
+ if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+ phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs_start);
+ unsigned long size = (__bp_harden_hyp_vecs_end -
+ __bp_harden_hyp_vecs_start);
+
+ /*
+ * Always allocate a spare vector slot, as we don't
+ * know yet which CPUs have a BP hardening slot that
+ * we can reuse.
+ */
+ __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+ BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS);
+ return create_hyp_exec_mappings(vect_pa, size,
+ &__kvm_bp_vect_base);
+ }
+
+ return 0;
+}
#else
static inline void *kvm_get_hyp_vector(void)
{
- return kvm_ksym_ref(__kvm_hyp_vector);
+ return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
}
static inline int kvm_map_vectors(void)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 50fa96a49792..49d99214f43c 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -29,12 +29,6 @@
#include <asm/sizes.h>
/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
-/*
* Size of the PCI I/O space. This must remain a power of two so that
* IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses.
*/
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index a050d4f3615d..dd320df0d026 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -21,6 +21,8 @@
#define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
#define TTBR_ASID_MASK (UL(0xffff) << 48)
+#define BP_HARDEN_EL2_SLOTS 4
+
#ifndef __ASSEMBLY__
typedef struct {
@@ -49,9 +51,13 @@ struct bp_hardening_data {
bp_hardening_cb_t fn;
};
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \
+ defined(CONFIG_HARDEN_EL2_VECTORS))
extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
+extern atomic_t arm64_el2_vector_last_slot;
+#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index b6dbbe3123a9..97d0ef12e2ff 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -39,7 +39,7 @@ struct mod_arch_specific {
u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
Elf64_Sym *sym);
-u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val);
+u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val);
#ifdef CONFIG_RANDOMIZE_BASE
extern u64 module_alloc_base;
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index 8747f7c5e0e7..9e690686e8aa 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -18,11 +18,6 @@
#define pcibios_assign_all_busses() \
(pci_has_flag(PCI_REASSIGN_ALL_BUS))
-/*
- * PCI address space differs from physical memory address space
- */
-#define PCI_DMA_BUS_IS_PHYS (0)
-
#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
extern int isa_dma_bridge_buggy;
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7e2c27e63cd8..7c4c8f318ba9 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -230,7 +230,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
}
}
-extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
+extern void __sync_icache_dcache(pte_t pteval);
/*
* PTE bits configuration in the presence of hardware Dirty Bit Management
@@ -253,7 +253,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t old_pte;
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
- __sync_icache_dcache(pte, addr);
+ __sync_icache_dcache(pte);
/*
* If the existing pte is valid, check for potential race with
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index ebdae15d665d..26c5bd7d88d8 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -122,11 +122,6 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- /*
- * Ensure prior spin_lock operations to other locks have completed
- * on this CPU before we test whether "lock" is locked.
- */
- smp_mb(); /* ^^^ */
return !arch_spin_value_unlocked(READ_ONCE(*lock));
}
diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h
index 15e35598ac40..eab738019707 100644
--- a/arch/arm64/include/asm/stat.h
+++ b/arch/arm64/include/asm/stat.h
@@ -20,6 +20,7 @@
#ifdef CONFIG_COMPAT
+#include <linux/compat_time.h>
#include <asm/compat.h>
/*
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index e7b9f154e476..6171178075dc 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -288,6 +288,12 @@
#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0)
#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0)
+#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0)
+#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1)
+#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2)
+#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3)
+#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7)
+
#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 9abbf3044654..04b3256f8e6d 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -206,6 +206,12 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6a4bd80c75bd..bf825f38d206 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -55,10 +55,6 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
-ifeq ($(CONFIG_KVM),y)
-arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o
-endif
-
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
head-y := head.o
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 414288a558c8..5c4bce4ac381 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -107,32 +107,53 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
return insn;
}
+static void patch_alternative(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ __le32 *replptr;
+ int i;
+
+ replptr = ALT_REPL_PTR(alt);
+ for (i = 0; i < nr_inst; i++) {
+ u32 insn;
+
+ insn = get_alt_insn(alt, origptr + i, replptr + i);
+ updptr[i] = cpu_to_le32(insn);
+ }
+}
+
static void __apply_alternatives(void *alt_region, bool use_linear_alias)
{
struct alt_instr *alt;
struct alt_region *region = alt_region;
- __le32 *origptr, *replptr, *updptr;
+ __le32 *origptr, *updptr;
+ alternative_cb_t alt_cb;
for (alt = region->begin; alt < region->end; alt++) {
- u32 insn;
- int i, nr_inst;
+ int nr_inst;
- if (!cpus_have_cap(alt->cpufeature))
+ /* Use ARM64_CB_PATCH as an unconditional patch */
+ if (alt->cpufeature < ARM64_CB_PATCH &&
+ !cpus_have_cap(alt->cpufeature))
continue;
- BUG_ON(alt->alt_len != alt->orig_len);
+ if (alt->cpufeature == ARM64_CB_PATCH)
+ BUG_ON(alt->alt_len != 0);
+ else
+ BUG_ON(alt->alt_len != alt->orig_len);
pr_info_once("patching kernel code\n");
origptr = ALT_ORIG_PTR(alt);
- replptr = ALT_REPL_PTR(alt);
updptr = use_linear_alias ? lm_alias(origptr) : origptr;
- nr_inst = alt->alt_len / sizeof(insn);
+ nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
- for (i = 0; i < nr_inst; i++) {
- insn = get_alt_insn(alt, origptr + i, replptr + i);
- updptr[i] = cpu_to_le32(insn);
- }
+ if (alt->cpufeature < ARM64_CB_PATCH)
+ alt_cb = patch_alternative;
+ else
+ alt_cb = ALT_REPL_PTR(alt);
+
+ alt_cb(alt, origptr, updptr, nr_inst);
flush_icache_range((uintptr_t)origptr,
(uintptr_t)(origptr + nr_inst));
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 66be504edb6c..d894a20b70b2 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -75,3 +75,11 @@ NOKPROBE_SYMBOL(_mcount);
/* arm-smccc */
EXPORT_SYMBOL(__arm_smccc_smc);
EXPORT_SYMBOL(__arm_smccc_hvc);
+
+ /* tishift.S */
+extern long long __ashlti3(long long a, int b);
+EXPORT_SYMBOL(__ashlti3);
+extern long long __ashrti3(long long a, int b);
+EXPORT_SYMBOL(__ashrti3);
+extern long long __lshrti3(long long a, int b);
+EXPORT_SYMBOL(__lshrti3);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 1303e04110cd..5bdda651bd05 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
+#include <linux/preempt.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
@@ -93,6 +94,8 @@ int main(void)
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
BLANK();
+ DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
+ BLANK();
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
@@ -138,6 +141,7 @@ int main(void)
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
+ DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx));
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
deleted file mode 100644
index e5de33513b5d..000000000000
--- a/arch/arm64/kernel/bpi.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Contains CPU specific branch predictor invalidation sequences
- *
- * Copyright (C) 2018 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/arm-smccc.h>
-
-.macro ventry target
- .rept 31
- nop
- .endr
- b \target
-.endm
-
-.macro vectors target
- ventry \target + 0x000
- ventry \target + 0x080
- ventry \target + 0x100
- ventry \target + 0x180
-
- ventry \target + 0x200
- ventry \target + 0x280
- ventry \target + 0x300
- ventry \target + 0x380
-
- ventry \target + 0x400
- ventry \target + 0x480
- ventry \target + 0x500
- ventry \target + 0x580
-
- ventry \target + 0x600
- ventry \target + 0x680
- ventry \target + 0x700
- ventry \target + 0x780
-.endm
-
- .align 11
-ENTRY(__bp_harden_hyp_vecs_start)
- .rept 4
- vectors __kvm_hyp_vector
- .endr
-ENTRY(__bp_harden_hyp_vecs_end)
-
-ENTRY(__qcom_hyp_sanitize_link_stack_start)
- stp x29, x30, [sp, #-16]!
- .rept 16
- bl . + 4
- .endr
- ldp x29, x30, [sp], #16
-ENTRY(__qcom_hyp_sanitize_link_stack_end)
-
-.macro smccc_workaround_1 inst
- sub sp, sp, #(8 * 4)
- stp x2, x3, [sp, #(8 * 0)]
- stp x0, x1, [sp, #(8 * 2)]
- mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
- \inst #0
- ldp x2, x3, [sp, #(8 * 0)]
- ldp x0, x1, [sp, #(8 * 2)]
- add sp, sp, #(8 * 4)
-.endm
-
-ENTRY(__smccc_workaround_1_smc_start)
- smccc_workaround_1 smc
-ENTRY(__smccc_workaround_1_smc_end)
-
-ENTRY(__smccc_workaround_1_hvc_start)
- smccc_workaround_1 hvc
-ENTRY(__smccc_workaround_1_hvc_end)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 2df792771053..e4a1182deff7 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -78,19 +78,17 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
config_sctlr_el1(SCTLR_EL1_UCT, 0);
}
+atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
+
#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-#ifdef CONFIG_KVM
-extern char __qcom_hyp_sanitize_link_stack_start[];
-extern char __qcom_hyp_sanitize_link_stack_end[];
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
extern char __smccc_workaround_1_smc_start[];
extern char __smccc_workaround_1_smc_end[];
-extern char __smccc_workaround_1_hvc_start[];
-extern char __smccc_workaround_1_hvc_end[];
static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
const char *hyp_vecs_end)
@@ -108,7 +106,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
const char *hyp_vecs_start,
const char *hyp_vecs_end)
{
- static int last_slot = -1;
static DEFINE_SPINLOCK(bp_lock);
int cpu, slot = -1;
@@ -121,10 +118,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
}
if (slot == -1) {
- last_slot++;
- BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start)
- / SZ_2K) <= last_slot);
- slot = last_slot;
+ slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+ BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
__copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
}
@@ -133,12 +128,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
spin_unlock(&bp_lock);
}
#else
-#define __qcom_hyp_sanitize_link_stack_start NULL
-#define __qcom_hyp_sanitize_link_stack_end NULL
#define __smccc_workaround_1_smc_start NULL
#define __smccc_workaround_1_smc_end NULL
-#define __smccc_workaround_1_hvc_start NULL
-#define __smccc_workaround_1_hvc_end NULL
static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
const char *hyp_vecs_start,
@@ -146,7 +137,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
{
__this_cpu_write(bp_hardening_data.fn, fn);
}
-#endif /* CONFIG_KVM */
+#endif /* CONFIG_KVM_INDIRECT_VECTORS */
static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
bp_hardening_cb_t fn,
@@ -179,12 +170,25 @@ static void call_hvc_arch_workaround_1(void)
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
}
+static void qcom_link_stack_sanitization(void)
+{
+ u64 tmp;
+
+ asm volatile("mov %0, x30 \n"
+ ".rept 16 \n"
+ "bl . + 4 \n"
+ ".endr \n"
+ "mov x30, %0 \n"
+ : "=&r" (tmp));
+}
+
static void
enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
{
bp_hardening_cb_t cb;
void *smccc_start, *smccc_end;
struct arm_smccc_res res;
+ u32 midr = read_cpuid_id();
if (!entry->matches(entry, SCOPE_LOCAL_CPU))
return;
@@ -199,8 +203,9 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
if ((int)res.a0 < 0)
return;
cb = call_hvc_arch_workaround_1;
- smccc_start = __smccc_workaround_1_hvc_start;
- smccc_end = __smccc_workaround_1_hvc_end;
+ /* This is a guest, no need to patch KVM vectors */
+ smccc_start = NULL;
+ smccc_end = NULL;
break;
case PSCI_CONDUIT_SMC:
@@ -217,30 +222,14 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
return;
}
+ if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+ ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
+ cb = qcom_link_stack_sanitization;
+
install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
return;
}
-
-static void qcom_link_stack_sanitization(void)
-{
- u64 tmp;
-
- asm volatile("mov %0, x30 \n"
- ".rept 16 \n"
- "bl . + 4 \n"
- ".endr \n"
- "mov x30, %0 \n"
- : "=&r" (tmp));
-}
-
-static void
-qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry)
-{
- install_bp_hardening_cb(entry, qcom_link_stack_sanitization,
- __qcom_hyp_sanitize_link_stack_start,
- __qcom_hyp_sanitize_link_stack_end);
-}
#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \
@@ -325,24 +314,19 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
- {},
-};
-
-static const struct midr_range qcom_bp_harden_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER),
{},
};
-static const struct arm64_cpu_capabilities arm64_bp_harden_list[] = {
- {
- CAP_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
- .cpu_enable = enable_smccc_arch_workaround_1,
- },
- {
- CAP_MIDR_RANGE_LIST(qcom_bp_harden_cpus),
- .cpu_enable = qcom_enable_link_stack_sanitization,
- },
+#endif
+
+#ifdef CONFIG_HARDEN_EL2_VECTORS
+
+static const struct midr_range arm64_harden_el2_vectors[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
{},
};
@@ -492,13 +476,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
- .matches = multi_entry_cap_matches,
- .cpu_enable = multi_entry_cap_cpu_enable,
- .match_list = arm64_bp_harden_list,
+ .cpu_enable = enable_smccc_arch_workaround_1,
+ ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
},
+#endif
+#ifdef CONFIG_HARDEN_EL2_VECTORS
{
- .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
- ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus),
+ .desc = "EL2 vector hardening",
+ .capability = ARM64_HARDEN_EL2_VECTORS,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
},
#endif
{
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 96b15d7b10a8..9d1b06d67c53 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -838,19 +838,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _
MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
}
-static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
- int __unused)
-{
- phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
-
- /*
- * Activate the lower HYP offset only if:
- * - the idmap doesn't clash with it,
- * - the kernel is not running at EL2.
- */
- return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode();
-}
-
static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
{
u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
@@ -881,6 +868,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
static const struct midr_range kpti_safe_list[] = {
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+ { /* sentinel */ }
};
char const *str = "command line option";
@@ -1121,12 +1109,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.field_pos = ID_AA64PFR0_EL0_SHIFT,
.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
},
- {
- .desc = "Reduced HYP mapping offset",
- .capability = ARM64_HYP_OFFSET_LOW,
- .type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .matches = hyp_offset_low,
- },
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
{
.desc = "Kernel page table isolation (KPTI)",
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 87a35364e750..4bcdd0318729 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -882,7 +882,7 @@ asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
si_code = FPE_FLTRES;
}
- memset(&info, 0, sizeof(info));
+ clear_siginfo(&info);
info.si_signo = SIGFPE;
info.si_code = si_code;
info.si_addr = (void __user *)instruction_pointer(regs);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2b6b8b24e5ab..b0853069702f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -577,6 +577,13 @@ set_hcr:
7:
msr mdcr_el2, x3 // Configure debug traps
+ /* LORegions */
+ mrs x1, id_aa64mmfr1_el1
+ ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+ cbz x0, 1f
+ msr_s SYS_LORC_EL1, xzr
+1:
+
/* Stage-2 translation */
msr vttbr_el2, xzr
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 74bb56f656ef..413dbe530da8 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -30,7 +30,6 @@
#include <linux/smp.h>
#include <linux/uaccess.h>
-#include <asm/compat.h>
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/hw_breakpoint.h>
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 2718a77da165..816d03c4c913 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -35,6 +35,7 @@
#define AARCH64_INSN_SF_BIT BIT(31)
#define AARCH64_INSN_N_BIT BIT(22)
+#define AARCH64_INSN_LSL_12 BIT(22)
static int aarch64_insn_encoding_class[] = {
AARCH64_INSN_CLS_UNKNOWN,
@@ -343,6 +344,10 @@ static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
mask = BIT(6) - 1;
shift = 16;
break;
+ case AARCH64_INSN_IMM_N:
+ mask = 1;
+ shift = 22;
+ break;
default:
return -EINVAL;
}
@@ -899,9 +904,18 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
return AARCH64_BREAK_FAULT;
}
+ /* We can't encode more than a 24bit value (12bit + 12bit shift) */
+ if (imm & ~(BIT(24) - 1))
+ goto out;
+
+ /* If we have something in the top 12 bits... */
if (imm & ~(SZ_4K - 1)) {
- pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
- return AARCH64_BREAK_FAULT;
+ /* ... and in the low 12 bits -> error */
+ if (imm & (SZ_4K - 1))
+ goto out;
+
+ imm >>= 12;
+ insn |= AARCH64_INSN_LSL_12;
}
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
@@ -909,6 +923,10 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
+
+out:
+ pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
+ return AARCH64_BREAK_FAULT;
}
u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
@@ -1481,3 +1499,171 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = {
__check_hi, __check_ls, __check_ge, __check_lt,
__check_gt, __check_le, __check_al, __check_al
};
+
+static bool range_of_ones(u64 val)
+{
+ /* Doesn't handle full ones or full zeroes */
+ u64 sval = val >> __ffs64(val);
+
+ /* One of Sean Eron Anderson's bithack tricks */
+ return ((sval + 1) & (sval)) == 0;
+}
+
+static u32 aarch64_encode_immediate(u64 imm,
+ enum aarch64_insn_variant variant,
+ u32 insn)
+{
+ unsigned int immr, imms, n, ones, ror, esz, tmp;
+ u64 mask = ~0UL;
+
+ /* Can't encode full zeroes or full ones */
+ if (!imm || !~imm)
+ return AARCH64_BREAK_FAULT;
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ if (upper_32_bits(imm))
+ return AARCH64_BREAK_FAULT;
+ esz = 32;
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ esz = 64;
+ break;
+ default:
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ /*
+ * Inverse of Replicate(). Try to spot a repeating pattern
+ * with a pow2 stride.
+ */
+ for (tmp = esz / 2; tmp >= 2; tmp /= 2) {
+ u64 emask = BIT(tmp) - 1;
+
+ if ((imm & emask) != ((imm >> tmp) & emask))
+ break;
+
+ esz = tmp;
+ mask = emask;
+ }
+
+ /* N is only set if we're encoding a 64bit value */
+ n = esz == 64;
+
+ /* Trim imm to the element size */
+ imm &= mask;
+
+ /* That's how many ones we need to encode */
+ ones = hweight64(imm);
+
+ /*
+ * imms is set to (ones - 1), prefixed with a string of ones
+ * and a zero if they fit. Cap it to 6 bits.
+ */
+ imms = ones - 1;
+ imms |= 0xf << ffs(esz);
+ imms &= BIT(6) - 1;
+
+ /* Compute the rotation */
+ if (range_of_ones(imm)) {
+ /*
+ * Pattern: 0..01..10..0
+ *
+ * Compute how many rotate we need to align it right
+ */
+ ror = __ffs64(imm);
+ } else {
+ /*
+ * Pattern: 0..01..10..01..1
+ *
+ * Fill the unused top bits with ones, and check if
+ * the result is a valid immediate (all ones with a
+ * contiguous ranges of zeroes).
+ */
+ imm |= ~mask;
+ if (!range_of_ones(~imm))
+ return AARCH64_BREAK_FAULT;
+
+ /*
+ * Compute the rotation to get a continuous set of
+ * ones, with the first bit set at position 0
+ */
+ ror = fls(~imm);
+ }
+
+ /*
+ * immr is the number of bits we need to rotate back to the
+ * original set of ones. Note that this is relative to the
+ * element size...
+ */
+ immr = (esz - ror) % esz;
+
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n);
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
+}
+
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u64 imm)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_LOGIC_AND:
+ insn = aarch64_insn_get_and_imm_value();
+ break;
+ case AARCH64_INSN_LOGIC_ORR:
+ insn = aarch64_insn_get_orr_imm_value();
+ break;
+ case AARCH64_INSN_LOGIC_EOR:
+ insn = aarch64_insn_get_eor_imm_value();
+ break;
+ case AARCH64_INSN_LOGIC_AND_SETFLAGS:
+ insn = aarch64_insn_get_ands_imm_value();
+ break;
+ default:
+ pr_err("%s: unknown logical encoding %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+ return aarch64_encode_immediate(imm, variant, insn);
+}
+
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rm,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u8 lsb)
+{
+ u32 insn;
+
+ insn = aarch64_insn_get_extr_value();
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ if (lsb > 31)
+ return AARCH64_BREAK_FAULT;
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ if (lsb > 63)
+ return AARCH64_BREAK_FAULT;
+ insn |= AARCH64_INSN_SF_BIT;
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1);
+ break;
+ default:
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb);
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
+}
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index fa3637284a3d..f0690c2ca3e0 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -43,7 +43,7 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
}
#ifdef CONFIG_ARM64_ERRATUM_843419
-u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val)
+u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val)
{
struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
&mod->arch.init;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 719fde8dcc19..155fd91e78f4 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -215,7 +215,7 @@ static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val)
insn &= ~BIT(31);
} else {
/* out of range for ADR -> emit a veneer */
- val = module_emit_adrp_veneer(mod, place, val & ~0xfff);
+ val = module_emit_veneer_for_adrp(mod, place, val & ~0xfff);
if (!val)
return -ENOEXEC;
insn = aarch64_insn_gen_branch_imm((u64)place, val,
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 1d091d048d04..0bbac612146e 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/perf_event.h>
#include <linux/bug.h>
#include <linux/sched/task_stack.h>
-#include <asm/compat.h>
#include <asm/perf_regs.h>
#include <asm/ptrace.h>
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 71d99af24ef2..7ff81fed46e1 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -25,6 +25,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
#include <linux/mm.h>
+#include <linux/nospec.h>
#include <linux/smp.h>
#include <linux/ptrace.h>
#include <linux/user.h>
@@ -249,15 +250,20 @@ static struct perf_event *ptrace_hbp_get_event(unsigned int note_type,
switch (note_type) {
case NT_ARM_HW_BREAK:
- if (idx < ARM_MAX_BRP)
- bp = tsk->thread.debug.hbp_break[idx];
+ if (idx >= ARM_MAX_BRP)
+ goto out;
+ idx = array_index_nospec(idx, ARM_MAX_BRP);
+ bp = tsk->thread.debug.hbp_break[idx];
break;
case NT_ARM_HW_WATCH:
- if (idx < ARM_MAX_WRP)
- bp = tsk->thread.debug.hbp_watch[idx];
+ if (idx >= ARM_MAX_WRP)
+ goto out;
+ idx = array_index_nospec(idx, ARM_MAX_WRP);
+ bp = tsk->thread.debug.hbp_watch[idx];
break;
}
+out:
return bp;
}
@@ -1458,9 +1464,7 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num,
{
int ret;
u32 kdata;
- mm_segment_t old_fs = get_fs();
- set_fs(KERNEL_DS);
/* Watchpoint */
if (num < 0) {
ret = compat_ptrace_hbp_get(NT_ARM_HW_WATCH, tsk, num, &kdata);
@@ -1471,7 +1475,6 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num,
} else {
ret = compat_ptrace_hbp_get(NT_ARM_HW_BREAK, tsk, num, &kdata);
}
- set_fs(old_fs);
if (!ret)
ret = put_user(kdata, data);
@@ -1484,7 +1487,6 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num,
{
int ret;
u32 kdata = 0;
- mm_segment_t old_fs = get_fs();
if (num == 0)
return 0;
@@ -1493,12 +1495,10 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num,
if (ret)
return ret;
- set_fs(KERNEL_DS);
if (num < 0)
ret = compat_ptrace_hbp_set(NT_ARM_HW_WATCH, tsk, num, &kdata);
else
ret = compat_ptrace_hbp_set(NT_ARM_HW_BREAK, tsk, num, &kdata);
- set_fs(old_fs);
return ret;
}
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 93ab57dcfc14..a6109825eeb9 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -112,6 +112,7 @@ long compat_arm_syscall(struct pt_regs *regs)
break;
}
+ clear_siginfo(&info);
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLTRP;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index ba964da31a25..d399d459397b 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -277,7 +277,8 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
* If we were single stepping, we want to get the step exception after
* we return from the trap.
*/
- user_fastforward_single_step(current);
+ if (user_mode(regs))
+ user_fastforward_single_step(current);
}
static LIST_HEAD(undef_hook);
@@ -366,7 +367,7 @@ void force_signal_inject(int signal, int code, unsigned long address)
}
/* Force signals we don't understand to SIGKILL */
- if (WARN_ON(signal != SIGKILL ||
+ if (WARN_ON(signal != SIGKILL &&
siginfo_layout(signal, code) != SIL_FAULT)) {
signal = SIGKILL;
}
@@ -634,6 +635,7 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
siginfo_t info;
void __user *pc = (void __user *)instruction_pointer(regs);
+ clear_siginfo(&info);
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLOPC;
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 2257dfcc44cc..a2e3a5af1113 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -57,6 +57,9 @@ config KVM_ARM_PMU
Adds support for a virtual Performance Monitoring Unit (PMU) in
virtual machines.
+config KVM_INDIRECT_VECTORS
+ def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS)
+
source drivers/vhost/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 87c4f7ae24de..93afff91cb7c 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index fa63b28c65e0..a1f4ebdfe6d3 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -46,7 +46,9 @@ static DEFINE_PER_CPU(u32, mdcr_el2);
*/
static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
{
- vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
+ u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+
+ vcpu->arch.guest_debug_preserved.mdscr_el1 = val;
trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
vcpu->arch.guest_debug_preserved.mdscr_el1);
@@ -54,10 +56,12 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
{
- vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1;
+ u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1;
+
+ vcpu_write_sys_reg(vcpu, val, MDSCR_EL1);
trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
- vcpu_sys_reg(vcpu, MDSCR_EL1));
+ vcpu_read_sys_reg(vcpu, MDSCR_EL1));
}
/**
@@ -108,6 +112,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
{
bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
+ unsigned long mdscr;
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
@@ -152,9 +157,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
*vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
- vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr |= DBG_MDSCR_SS;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
} else {
- vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr &= ~DBG_MDSCR_SS;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
}
trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
@@ -170,7 +179,9 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
/* Enable breakpoints/watchpoints */
- vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr |= DBG_MDSCR_MDE;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
@@ -193,8 +204,12 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
if (trap_debug)
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
+ /* If KDE or MDE are set, perform a full save/restore cycle. */
+ if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
- trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
+ trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
}
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 959e50d2588c..56a0260ceb11 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -25,6 +25,7 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+#include <kvm/arm_psci.h>
#include <asm/cputype.h>
#include <linux/uaccess.h>
#include <asm/kvm.h>
@@ -205,7 +206,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
{
return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
- + NUM_TIMER_REGS;
+ + kvm_arm_get_fw_num_regs(vcpu) + NUM_TIMER_REGS;
}
/**
@@ -225,6 +226,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
uindices++;
}
+ ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
+ if (ret)
+ return ret;
+ uindices += kvm_arm_get_fw_num_regs(vcpu);
+
ret = copy_timer_indices(vcpu, uindices);
if (ret)
return ret;
@@ -243,6 +249,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
return get_core_reg(vcpu, reg);
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
+ return kvm_arm_get_fw_reg(vcpu, reg);
+
if (is_timer_reg(reg->id))
return get_timer_reg(vcpu, reg);
@@ -259,6 +268,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
return set_core_reg(vcpu, reg);
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
+ return kvm_arm_set_fw_reg(vcpu, reg);
+
if (is_timer_reg(reg->id))
return set_timer_reg(vcpu, reg);
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 5aa9ccf6db99..6fd91b31a131 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -117,7 +117,6 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
/* Set the stack and new vectors */
kern_hyp_va x1
mov sp, x1
- kern_hyp_va x2
msr vbar_el2, x2
/* copy tpidr_el1 into tpidr_el2 for use by HYP */
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index f04400d494b7..4313f7475333 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -7,10 +7,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
KVM=../../../../virt/kvm
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index dabb5cc7b087..3e717f66f011 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -66,11 +66,6 @@
default: write_debug(ptr[0], reg, 0); \
}
-static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
-{
- /* The vcpu can run. but it can't hide. */
-}
-
static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
{
u64 reg;
@@ -103,11 +98,7 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
dsb(nsh);
}
-static hyp_alternate_select(__debug_save_spe,
- __debug_save_spe_nvhe, __debug_save_spe_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
+static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1)
{
if (!pmscr_el1)
return;
@@ -119,16 +110,13 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
}
-void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
- if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
- return;
-
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
brps = (aa64dfr0 >> 12) & 0xf;
wrps = (aa64dfr0 >> 20) & 0xf;
@@ -141,16 +129,13 @@ void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
}
-void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
- if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
- return;
-
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
brps = (aa64dfr0 >> 12) & 0xf;
@@ -164,27 +149,54 @@ void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
}
-void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
+void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
{
- /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform
- * a full save/restore cycle. */
- if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) ||
- (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
- vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
-
- __debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs,
- kern_hyp_va(vcpu->arch.host_cpu_context));
- __debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1);
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ struct kvm_guest_debug_arch *host_dbg;
+ struct kvm_guest_debug_arch *guest_dbg;
+
+ /*
+ * Non-VHE: Disable and flush SPE data generation
+ * VHE: The vcpu can run, but it can't hide.
+ */
+ if (!has_vhe())
+ __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
+
+ if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+ return;
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ guest_ctxt = &vcpu->arch.ctxt;
+ host_dbg = &vcpu->arch.host_debug_state.regs;
+ guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+ __debug_save_state(vcpu, host_dbg, host_ctxt);
+ __debug_restore_state(vcpu, guest_dbg, guest_ctxt);
}
-void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
+void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
- __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
- __debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs,
- kern_hyp_va(vcpu->arch.host_cpu_context));
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ struct kvm_guest_debug_arch *host_dbg;
+ struct kvm_guest_debug_arch *guest_dbg;
+
+ if (!has_vhe())
+ __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
+
+ if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+ return;
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ guest_ctxt = &vcpu->arch.ctxt;
+ host_dbg = &vcpu->arch.host_debug_state.regs;
+ guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+ __debug_save_state(vcpu, guest_dbg, guest_ctxt);
+ __debug_restore_state(vcpu, host_dbg, host_ctxt);
- if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
- vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
}
u32 __hyp_text __kvm_get_mdcr_el2(void)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fdd1068ee3a5..e41a161d313a 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,9 +62,6 @@ ENTRY(__guest_enter)
// Store the host regs
save_callee_saved_regs x1
- // Store host_ctxt and vcpu for use at exit time
- stp x1, x0, [sp, #-16]!
-
add x18, x0, #VCPU_CONTEXT
// Restore guest regs x0-x17
@@ -118,8 +115,7 @@ ENTRY(__guest_exit)
// Store the guest regs x19-x29, lr
save_callee_saved_regs x1
- // Restore the host_ctxt from the stack
- ldr x2, [sp], #16
+ get_host_ctxt x2, x3
// Now restore the host regs
restore_callee_saved_regs x2
@@ -213,15 +209,3 @@ alternative_endif
eret
ENDPROC(__fpsimd_guest_restore)
-
-ENTRY(__qcom_hyp_sanitize_btac_predictors)
- /**
- * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700)
- * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls
- * b15-b0: contains SiP functionID
- */
- movz x0, #0x1700
- movk x0, #0xc200, lsl #16
- smc #0
- ret
-ENDPROC(__qcom_hyp_sanitize_btac_predictors)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index f36464bd57c5..bffece27b5c1 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2015 - ARM Ltd
+ * Copyright (C) 2015-2018 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -24,6 +24,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
+#include <asm/mmu.h>
.text
.pushsection .hyp.text, "ax"
@@ -55,15 +56,9 @@ ENTRY(__vhe_hyp_call)
ENDPROC(__vhe_hyp_call)
el1_sync: // Guest trapped into EL2
- stp x0, x1, [sp, #-16]!
-
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- mrs x1, esr_el2
-alternative_else
- mrs x1, esr_el1
-alternative_endif
- lsr x0, x1, #ESR_ELx_EC_SHIFT
+ mrs x0, esr_el2
+ lsr x0, x0, #ESR_ELx_EC_SHIFT
cmp x0, #ESR_ELx_EC_HVC64
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
@@ -117,10 +112,14 @@ el1_hvc_guest:
eret
el1_trap:
+ get_vcpu_ptr x1, x0
+
+ mrs x0, esr_el2
+ lsr x0, x0, #ESR_ELx_EC_SHIFT
/*
* x0: ESR_EC
+ * x1: vcpu pointer
*/
- ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter
/*
* We trap the first access to the FP/SIMD to save the host context
@@ -137,18 +136,18 @@ alternative_else_nop_endif
b __guest_exit
el1_irq:
- stp x0, x1, [sp, #-16]!
- ldr x1, [sp, #16 + 8]
+ get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_IRQ
b __guest_exit
el1_error:
- stp x0, x1, [sp, #-16]!
- ldr x1, [sp, #16 + 8]
+ get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_EL1_SERROR
b __guest_exit
el2_error:
+ ldp x0, x1, [sp], #16
+
/*
* Only two possibilities:
* 1) Either we come from the exit path, having just unmasked
@@ -180,14 +179,7 @@ ENTRY(__hyp_do_panic)
ENDPROC(__hyp_do_panic)
ENTRY(__hyp_panic)
- /*
- * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
- * not be accessible by this address from EL2, hyp_panic() converts
- * it with kern_hyp_va() before use.
- */
- ldr x0, =kvm_host_cpu_state
- mrs x1, tpidr_el2
- add x0, x0, x1
+ get_host_ctxt x0, x1
b hyp_panic
ENDPROC(__hyp_panic)
@@ -206,32 +198,104 @@ ENDPROC(\label)
invalid_vector el2h_sync_invalid
invalid_vector el2h_irq_invalid
invalid_vector el2h_fiq_invalid
- invalid_vector el1_sync_invalid
- invalid_vector el1_irq_invalid
invalid_vector el1_fiq_invalid
.ltorg
.align 11
+.macro valid_vect target
+ .align 7
+ stp x0, x1, [sp, #-16]!
+ b \target
+.endm
+
+.macro invalid_vect target
+ .align 7
+ b \target
+ ldp x0, x1, [sp], #16
+ b \target
+.endm
+
ENTRY(__kvm_hyp_vector)
- ventry el2t_sync_invalid // Synchronous EL2t
- ventry el2t_irq_invalid // IRQ EL2t
- ventry el2t_fiq_invalid // FIQ EL2t
- ventry el2t_error_invalid // Error EL2t
-
- ventry el2h_sync_invalid // Synchronous EL2h
- ventry el2h_irq_invalid // IRQ EL2h
- ventry el2h_fiq_invalid // FIQ EL2h
- ventry el2_error // Error EL2h
-
- ventry el1_sync // Synchronous 64-bit EL1
- ventry el1_irq // IRQ 64-bit EL1
- ventry el1_fiq_invalid // FIQ 64-bit EL1
- ventry el1_error // Error 64-bit EL1
-
- ventry el1_sync // Synchronous 32-bit EL1
- ventry el1_irq // IRQ 32-bit EL1
- ventry el1_fiq_invalid // FIQ 32-bit EL1
- ventry el1_error // Error 32-bit EL1
+ invalid_vect el2t_sync_invalid // Synchronous EL2t
+ invalid_vect el2t_irq_invalid // IRQ EL2t
+ invalid_vect el2t_fiq_invalid // FIQ EL2t
+ invalid_vect el2t_error_invalid // Error EL2t
+
+ invalid_vect el2h_sync_invalid // Synchronous EL2h
+ invalid_vect el2h_irq_invalid // IRQ EL2h
+ invalid_vect el2h_fiq_invalid // FIQ EL2h
+ valid_vect el2_error // Error EL2h
+
+ valid_vect el1_sync // Synchronous 64-bit EL1
+ valid_vect el1_irq // IRQ 64-bit EL1
+ invalid_vect el1_fiq_invalid // FIQ 64-bit EL1
+ valid_vect el1_error // Error 64-bit EL1
+
+ valid_vect el1_sync // Synchronous 32-bit EL1
+ valid_vect el1_irq // IRQ 32-bit EL1
+ invalid_vect el1_fiq_invalid // FIQ 32-bit EL1
+ valid_vect el1_error // Error 32-bit EL1
ENDPROC(__kvm_hyp_vector)
+
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+.macro hyp_ventry
+ .align 7
+1: .rept 27
+ nop
+ .endr
+/*
+ * The default sequence is to directly branch to the KVM vectors,
+ * using the computed offset. This applies for VHE as well as
+ * !ARM64_HARDEN_EL2_VECTORS.
+ *
+ * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
+ * with:
+ *
+ * stp x0, x1, [sp, #-16]!
+ * movz x0, #(addr & 0xffff)
+ * movk x0, #((addr >> 16) & 0xffff), lsl #16
+ * movk x0, #((addr >> 32) & 0xffff), lsl #32
+ * br x0
+ *
+ * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
+ * See kvm_patch_vector_branch for details.
+ */
+alternative_cb kvm_patch_vector_branch
+ b __kvm_hyp_vector + (1b - 0b)
+ nop
+ nop
+ nop
+ nop
+alternative_cb_end
+.endm
+
+.macro generate_vectors
+0:
+ .rept 16
+ hyp_ventry
+ .endr
+ .org 0b + SZ_2K // Safety measure
+.endm
+
+ .align 11
+ENTRY(__bp_harden_hyp_vecs_start)
+ .rept BP_HARDEN_EL2_SLOTS
+ generate_vectors
+ .endr
+ENTRY(__bp_harden_hyp_vecs_end)
+
+ .popsection
+
+ENTRY(__smccc_workaround_1_smc_start)
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ smc #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ ldp x0, x1, [sp, #(8 * 2)]
+ add sp, sp, #(8 * 4)
+ENTRY(__smccc_workaround_1_smc_end)
+#endif
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 870f4b1587f9..d9645236e474 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -33,49 +33,22 @@ static bool __hyp_text __fpsimd_enabled_nvhe(void)
return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
}
-static bool __hyp_text __fpsimd_enabled_vhe(void)
+static bool fpsimd_enabled_vhe(void)
{
return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
}
-static hyp_alternate_select(__fpsimd_is_enabled,
- __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-bool __hyp_text __fpsimd_enabled(void)
-{
- return __fpsimd_is_enabled()();
-}
-
-static void __hyp_text __activate_traps_vhe(void)
-{
- u64 val;
-
- val = read_sysreg(cpacr_el1);
- val |= CPACR_EL1_TTA;
- val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
- write_sysreg(val, cpacr_el1);
-
- write_sysreg(kvm_get_hyp_vector(), vbar_el1);
-}
-
-static void __hyp_text __activate_traps_nvhe(void)
+/* Save the 32-bit only FPSIMD system register state */
+static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
{
- u64 val;
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
- val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
- write_sysreg(val, cptr_el2);
+ vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
}
-static hyp_alternate_select(__activate_traps_arch,
- __activate_traps_nvhe, __activate_traps_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
{
- u64 val;
-
/*
* We are about to set CPTR_EL2.TFP to trap all floating point
* register accesses to EL2, however, the ARM ARM clearly states that
@@ -85,23 +58,17 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
* If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
* it will cause an exception.
*/
- val = vcpu->arch.hcr_el2;
-
- if (!(val & HCR_RW) && system_supports_fpsimd()) {
+ if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
write_sysreg(1 << 30, fpexc32_el2);
isb();
}
+}
- if (val & HCR_RW) /* for AArch64 only: */
- val |= HCR_TID3; /* TID3: trap feature register accesses */
-
- write_sysreg(val, hcr_el2);
-
- if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE))
- write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
- /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
+static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+ /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
+
/*
* Make sure we trap PMU access from EL0 to EL2. Also sanitize
* PMSELR_EL0 to make sure it never contains the cycle
@@ -111,19 +78,56 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(0, pmselr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
- __activate_traps_arch()();
}
-static void __hyp_text __deactivate_traps_vhe(void)
+static void __hyp_text __deactivate_traps_common(void)
{
- extern char vectors[]; /* kernel exception vectors */
- u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ write_sysreg(0, hstr_el2);
+ write_sysreg(0, pmuserenr_el0);
+}
- mdcr_el2 &= MDCR_EL2_HPMN_MASK |
- MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
- MDCR_EL2_TPMS;
+static void activate_traps_vhe(struct kvm_vcpu *vcpu)
+{
+ u64 val;
- write_sysreg(mdcr_el2, mdcr_el2);
+ val = read_sysreg(cpacr_el1);
+ val |= CPACR_EL1_TTA;
+ val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+ write_sysreg(val, cpacr_el1);
+
+ write_sysreg(kvm_get_hyp_vector(), vbar_el1);
+}
+
+static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ __activate_traps_common(vcpu);
+
+ val = CPTR_EL2_DEFAULT;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
+ write_sysreg(val, cptr_el2);
+}
+
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ write_sysreg(hcr, hcr_el2);
+
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+ write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+
+ __activate_traps_fpsimd32(vcpu);
+ if (has_vhe())
+ activate_traps_vhe(vcpu);
+ else
+ __activate_traps_nvhe(vcpu);
+}
+
+static void deactivate_traps_vhe(void)
+{
+ extern char vectors[]; /* kernel exception vectors */
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
write_sysreg(vectors, vbar_el1);
@@ -133,6 +137,8 @@ static void __hyp_text __deactivate_traps_nvhe(void)
{
u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ __deactivate_traps_common();
+
mdcr_el2 &= MDCR_EL2_HPMN_MASK;
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
@@ -141,10 +147,6 @@ static void __hyp_text __deactivate_traps_nvhe(void)
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
}
-static hyp_alternate_select(__deactivate_traps_arch,
- __deactivate_traps_nvhe, __deactivate_traps_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
{
/*
@@ -156,14 +158,32 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
if (vcpu->arch.hcr_el2 & HCR_VSE)
vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
- __deactivate_traps_arch()();
- write_sysreg(0, hstr_el2);
- write_sysreg(0, pmuserenr_el0);
+ if (has_vhe())
+ deactivate_traps_vhe();
+ else
+ __deactivate_traps_nvhe();
+}
+
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+{
+ __activate_traps_common(vcpu);
+}
+
+void deactivate_traps_vhe_put(void)
+{
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+ MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+ MDCR_EL2_TPMS;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+
+ __deactivate_traps_common();
}
-static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
+static void __hyp_text __activate_vm(struct kvm *kvm)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
}
@@ -172,29 +192,22 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
write_sysreg(0, vttbr_el2);
}
-static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(vcpu);
- else
- __vgic_v2_save_state(vcpu);
-
- write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
+ __vgic_v3_deactivate_traps(vcpu);
+ }
}
-static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
{
- u64 val;
-
- val = read_sysreg(hcr_el2);
- val |= HCR_INT_OVERRIDE;
- val |= vcpu->arch.irq_lines;
- write_sysreg(val, hcr_el2);
-
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_activate_traps(vcpu);
__vgic_v3_restore_state(vcpu);
- else
- __vgic_v2_restore_state(vcpu);
+ }
}
static bool __hyp_text __true_value(void)
@@ -305,54 +318,27 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
}
}
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- bool fp_enabled;
- u64 exit_code;
-
- vcpu = kern_hyp_va(vcpu);
-
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
- host_ctxt->__hyp_running_vcpu = vcpu;
- guest_ctxt = &vcpu->arch.ctxt;
-
- __sysreg_save_host_state(host_ctxt);
- __debug_cond_save_host_state(vcpu);
-
- __activate_traps(vcpu);
- __activate_vm(vcpu);
-
- __vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
- /*
- * We must restore the 32-bit state before the sysregs, thanks
- * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
- */
- __sysreg32_restore_state(vcpu);
- __sysreg_restore_guest_state(guest_ctxt);
- __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
-
- /* Jump in the fire! */
-again:
- exit_code = __guest_enter(vcpu, host_ctxt);
- /* And we're baaack! */
-
- if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ)
+ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
+
/*
* We're using the raw exception code in order to only process
* the trap if no SError is pending. We will come back to the
* same PC once the SError has been injected, and replay the
* trapping instruction.
*/
- if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
- goto again;
+ if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+ return true;
if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
- exit_code == ARM_EXCEPTION_TRAP) {
+ *exit_code == ARM_EXCEPTION_TRAP) {
bool valid;
valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
@@ -366,9 +352,9 @@ again:
if (ret == 1) {
if (__skip_instr(vcpu))
- goto again;
+ return true;
else
- exit_code = ARM_EXCEPTION_TRAP;
+ *exit_code = ARM_EXCEPTION_TRAP;
}
if (ret == -1) {
@@ -380,62 +366,135 @@ again:
*/
if (!__skip_instr(vcpu))
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
- exit_code = ARM_EXCEPTION_EL1_SERROR;
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
}
-
- /* 0 falls through to be handler out of EL2 */
}
}
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- exit_code == ARM_EXCEPTION_TRAP &&
+ *exit_code == ARM_EXCEPTION_TRAP &&
(kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
int ret = __vgic_v3_perform_cpuif_access(vcpu);
if (ret == 1) {
if (__skip_instr(vcpu))
- goto again;
+ return true;
else
- exit_code = ARM_EXCEPTION_TRAP;
+ *exit_code = ARM_EXCEPTION_TRAP;
}
-
- /* 0 falls through to be handled out of EL2 */
}
- if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) {
- u32 midr = read_cpuid_id();
+ /* Return to the host kernel and handle the exit */
+ return false;
+}
- /* Apply BTAC predictors mitigation to all Falkor chips */
- if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
- ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
- __qcom_hyp_sanitize_btac_predictors();
- }
+/* Switch to the guest for VHE systems running in EL2 */
+int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool fp_enabled;
+ u64 exit_code;
+
+ host_ctxt = vcpu->arch.host_cpu_context;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ sysreg_save_host_state_vhe(host_ctxt);
+
+ __activate_traps(vcpu);
+ __activate_vm(vcpu->kvm);
+
+ sysreg_restore_guest_state_vhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ fp_enabled = fpsimd_enabled_vhe();
+
+ sysreg_save_guest_state_vhe(guest_ctxt);
+
+ __deactivate_traps(vcpu);
+
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ if (fp_enabled) {
+ __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
+ __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ __fpsimd_save_fpexc32(vcpu);
}
- fp_enabled = __fpsimd_enabled();
+ __debug_switch_to_host(vcpu);
+
+ return exit_code;
+}
+
+/* Switch to the guest for legacy non-VHE systems */
+int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool fp_enabled;
+ u64 exit_code;
+
+ vcpu = kern_hyp_va(vcpu);
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ __sysreg_save_state_nvhe(host_ctxt);
+
+ __activate_traps(vcpu);
+ __activate_vm(kern_hyp_va(vcpu->kvm));
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
+ /*
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_state_nvhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
- __sysreg_save_guest_state(guest_ctxt);
+ fp_enabled = __fpsimd_enabled_nvhe();
+
+ __sysreg_save_state_nvhe(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_disable_traps(vcpu);
- __vgic_save_state(vcpu);
+ __hyp_vgic_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
- __sysreg_restore_host_state(host_ctxt);
+ __sysreg_restore_state_nvhe(host_ctxt);
if (fp_enabled) {
__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ __fpsimd_save_fpexc32(vcpu);
}
- __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
/*
* This must come after restoring the host sysregs, since a non-VHE
* system may enable SPE here and make use of the TTBRs.
*/
- __debug_cond_restore_host_state(vcpu);
+ __debug_switch_to_host(vcpu);
return exit_code;
}
@@ -443,10 +502,20 @@ again:
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
- struct kvm_vcpu *vcpu)
+ struct kvm_cpu_context *__host_ctxt)
{
+ struct kvm_vcpu *vcpu;
unsigned long str_va;
+ vcpu = __host_ctxt->__hyp_running_vcpu;
+
+ if (read_sysreg(vttbr_el2)) {
+ __timer_disable_traps(vcpu);
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+ __sysreg_restore_state_nvhe(__host_ctxt);
+ }
+
/*
* Force the panic string to be loaded from the literal pool,
* making sure it is a kernel address and not a PC-relative
@@ -460,40 +529,31 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
read_sysreg(hpfar_el2), par, vcpu);
}
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
- struct kvm_vcpu *vcpu)
+static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_cpu_context *host_ctxt)
{
+ struct kvm_vcpu *vcpu;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ __deactivate_traps(vcpu);
+ sysreg_restore_host_state_vhe(host_ctxt);
+
panic(__hyp_panic_string,
spsr, elr,
read_sysreg_el2(esr), read_sysreg_el2(far),
read_sysreg(hpfar_el2), par, vcpu);
}
-static hyp_alternate_select(__hyp_call_panic,
- __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
{
- struct kvm_vcpu *vcpu = NULL;
-
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
- if (read_sysreg(vttbr_el2)) {
- struct kvm_cpu_context *host_ctxt;
-
- host_ctxt = kern_hyp_va(__host_ctxt);
- vcpu = host_ctxt->__hyp_running_vcpu;
- __timer_disable_traps(vcpu);
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
- __sysreg_restore_host_state(host_ctxt);
- }
-
- /* Call panic for real */
- __hyp_call_panic()(spsr, elr, par, vcpu);
+ if (!has_vhe())
+ __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
+ else
+ __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
unreachable();
}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 2c17afd2be96..b3894df6bf1a 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -19,32 +19,43 @@
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
-/* Yes, this does nothing, on purpose */
-static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
-
/*
* Non-VHE: Both host and guest must save everything.
*
- * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
- * and guest must save everything.
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate,
+ * which are handled as part of the el2 return state) on every switch.
+ * tpidr_el0 and tpidrro_el0 only need to be switched when going
+ * to host userspace or a different VCPU. EL1 registers only need to be
+ * switched when potentially going to run a different VCPU. The latter two
+ * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
*/
static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
- ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
- ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
- ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
+
+ /*
+ * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+ * therefore be saved/restored on every entry/exit to/from the guest.
+ */
ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
}
-static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
+ ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
+ ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
@@ -64,6 +75,10 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+}
+
+static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
+{
ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
@@ -71,36 +86,48 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
}
-static hyp_alternate_select(__sysreg_call_save_host_state,
- __sysreg_save_state, __sysreg_do_nothing,
- ARM64_HAS_VIRT_HOST_EXTN);
+void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_el1_state(ctxt);
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_user_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
-void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_call_save_host_state()(ctxt);
__sysreg_save_common_state(ctxt);
}
-void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_save_state(ctxt);
__sysreg_save_common_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
}
static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
- write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
- write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
- write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
+
+ /*
+ * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+ * therefore be saved/restored on every entry/exit to/from the guest.
+ */
write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
}
-static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
+ write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
+ write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
@@ -120,6 +147,11 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+}
+
+static void __hyp_text
+__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
+{
write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
@@ -127,27 +159,30 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
}
-static hyp_alternate_select(__sysreg_call_restore_host_state,
- __sysreg_restore_state, __sysreg_do_nothing,
- ARM64_HAS_VIRT_HOST_EXTN);
+void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_user_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
-void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt)
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_call_restore_host_state()(ctxt);
__sysreg_restore_common_state(ctxt);
}
-void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt)
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_restore_state(ctxt);
__sysreg_restore_common_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
}
void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
{
u64 *spsr, *sysreg;
- if (read_sysreg(hcr_el2) & HCR_RW)
+ if (!vcpu_el1_is_32bit(vcpu))
return;
spsr = vcpu->arch.ctxt.gp_regs.spsr;
@@ -161,10 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
- if (__fpsimd_enabled())
- sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
-
- if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
}
@@ -172,7 +204,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
{
u64 *spsr, *sysreg;
- if (read_sysreg(hcr_el2) & HCR_RW)
+ if (!vcpu_el1_is_32bit(vcpu))
return;
spsr = vcpu->arch.ctxt.gp_regs.spsr;
@@ -186,6 +218,78 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
- if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
}
+
+/**
+ * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Load system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_load() function
+ * and loading system register state early avoids having to load them on
+ * every entry to the VM.
+ */
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+ if (!has_vhe())
+ return;
+
+ __sysreg_save_user_state(host_ctxt);
+
+ /*
+ * Load guest EL1 and user state
+ *
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_user_state(guest_ctxt);
+ __sysreg_restore_el1_state(guest_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = true;
+
+ activate_traps_vhe_load(vcpu);
+}
+
+/**
+ * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Save guest system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_put() function
+ * and deferring saving system register state until we're no longer running the
+ * VCPU avoids having to save them on every exit from the VM.
+ */
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+ if (!has_vhe())
+ return;
+
+ deactivate_traps_vhe_put();
+
+ __sysreg_save_el1_state(guest_ctxt);
+ __sysreg_save_user_state(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+
+ /* Restore host user state */
+ __sysreg_restore_user_state(host_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = false;
+}
+
+void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
+{
+ asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
+}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
new file mode 100644
index 000000000000..39be799d0417
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm_host.h>
+#include <linux/swab.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ return !!(read_sysreg_el2(spsr) & COMPAT_PSR_E_BIT);
+
+ return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE);
+}
+
+/*
+ * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
+ * guest.
+ *
+ * @vcpu: the offending vcpu
+ *
+ * Returns:
+ * 1: GICV access successfully performed
+ * 0: Not a GICV access
+ * -1: Illegal GICV access
+ */
+int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+ struct vgic_dist *vgic = &kvm->arch.vgic;
+ phys_addr_t fault_ipa;
+ void __iomem *addr;
+ int rd;
+
+ /* Build the full address */
+ fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+ /* If not for GICV, move on */
+ if (fault_ipa < vgic->vgic_cpu_base ||
+ fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE))
+ return 0;
+
+ /* Reject anything but a 32bit access */
+ if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
+ return -1;
+
+ /* Not aligned? Don't bother */
+ if (fault_ipa & 3)
+ return -1;
+
+ rd = kvm_vcpu_dabt_get_rd(vcpu);
+ addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
+ addr += fault_ipa - vgic->vgic_cpu_base;
+
+ if (kvm_vcpu_dabt_iswrite(vcpu)) {
+ u32 data = vcpu_get_reg(vcpu, rd);
+ if (__is_be(vcpu)) {
+ /* guest pre-swabbed data, undo this for writel() */
+ data = swab32(data);
+ }
+ writel_relaxed(data, addr);
+ } else {
+ u32 data = readl_relaxed(addr);
+ if (__is_be(vcpu)) {
+ /* guest expects swabbed data */
+ data = swab32(data);
+ }
+ vcpu_set_reg(vcpu, rd, data);
+ }
+
+ return 1;
+}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 60666a056944..d8e71659ba7e 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -58,7 +58,7 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
exc_offset = LOWER_EL_AArch32_VECTOR;
}
- return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
+ return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
}
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@@ -67,13 +67,13 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u32 esr = 0;
- *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+ vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
- *vcpu_spsr(vcpu) = cpsr;
+ vcpu_write_spsr(vcpu, cpsr);
- vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+ vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
/*
* Build an {i,d}abort, depending on the level and the
@@ -94,7 +94,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
if (!is_iabt)
esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
- vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT;
+ vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1);
}
static void inject_undef64(struct kvm_vcpu *vcpu)
@@ -102,11 +102,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
- *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+ vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
- *vcpu_spsr(vcpu) = cpsr;
+ vcpu_write_spsr(vcpu, cpsr);
/*
* Build an unknown exception, depending on the instruction
@@ -115,7 +115,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
if (kvm_vcpu_trap_il_is32bit(vcpu))
esr |= ESR_ELx_IL;
- vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
}
/**
@@ -128,7 +128,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
*/
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
{
- if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ if (vcpu_el1_is_32bit(vcpu))
kvm_inject_dabt32(vcpu, addr);
else
inject_abt64(vcpu, false, addr);
@@ -144,7 +144,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
*/
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
{
- if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ if (vcpu_el1_is_32bit(vcpu))
kvm_inject_pabt32(vcpu, addr);
else
inject_abt64(vcpu, true, addr);
@@ -158,7 +158,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
*/
void kvm_inject_undefined(struct kvm_vcpu *vcpu)
{
- if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ if (vcpu_el1_is_32bit(vcpu))
kvm_inject_undef32(vcpu);
else
inject_undef64(vcpu);
@@ -167,7 +167,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
{
vcpu_set_vsesr(vcpu, esr);
- vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
+ *vcpu_hcr(vcpu) |= HCR_VSE;
}
/**
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
index bbc6ae32e4af..eefe403a2e63 100644
--- a/arch/arm64/kvm/regmap.c
+++ b/arch/arm64/kvm/regmap.c
@@ -141,28 +141,61 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
/*
* Return the SPSR for the current mode of the virtual CPU.
*/
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu)
{
unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
switch (mode) {
- case COMPAT_PSR_MODE_SVC:
- mode = KVM_SPSR_SVC;
- break;
- case COMPAT_PSR_MODE_ABT:
- mode = KVM_SPSR_ABT;
- break;
- case COMPAT_PSR_MODE_UND:
- mode = KVM_SPSR_UND;
- break;
- case COMPAT_PSR_MODE_IRQ:
- mode = KVM_SPSR_IRQ;
- break;
- case COMPAT_PSR_MODE_FIQ:
- mode = KVM_SPSR_FIQ;
- break;
+ case COMPAT_PSR_MODE_SVC: return KVM_SPSR_SVC;
+ case COMPAT_PSR_MODE_ABT: return KVM_SPSR_ABT;
+ case COMPAT_PSR_MODE_UND: return KVM_SPSR_UND;
+ case COMPAT_PSR_MODE_IRQ: return KVM_SPSR_IRQ;
+ case COMPAT_PSR_MODE_FIQ: return KVM_SPSR_FIQ;
+ default: BUG();
+ }
+}
+
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
+{
+ int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ return vcpu_gp_regs(vcpu)->spsr[spsr_idx];
+
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ return read_sysreg_el1(spsr);
+ case KVM_SPSR_ABT:
+ return read_sysreg(spsr_abt);
+ case KVM_SPSR_UND:
+ return read_sysreg(spsr_und);
+ case KVM_SPSR_IRQ:
+ return read_sysreg(spsr_irq);
+ case KVM_SPSR_FIQ:
+ return read_sysreg(spsr_fiq);
default:
BUG();
}
+}
+
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
+{
+ int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+ if (!vcpu->arch.sysregs_loaded_on_cpu) {
+ vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v;
+ return;
+ }
- return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ write_sysreg_el1(v, spsr);
+ case KVM_SPSR_ABT:
+ write_sysreg(v, spsr_abt);
+ case KVM_SPSR_UND:
+ write_sysreg(v, spsr_und);
+ case KVM_SPSR_IRQ:
+ write_sysreg(v, spsr_irq);
+ case KVM_SPSR_FIQ:
+ write_sysreg(v, spsr_fiq);
+ }
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 50a43c7b97ca..6e3b969391fd 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -35,6 +35,7 @@
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
+#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/perf_event.h>
#include <asm/sysreg.h>
@@ -76,6 +77,93 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
return false;
}
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
+{
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ goto immediate_read;
+
+ /*
+ * System registers listed in the switch are not saved on every
+ * exit from the guest but are only saved on vcpu_put.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the guest cannot modify its
+ * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
+ * thread when emulating cross-VCPU communication.
+ */
+ switch (reg) {
+ case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1);
+ case SCTLR_EL1: return read_sysreg_s(sctlr_EL12);
+ case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
+ case CPACR_EL1: return read_sysreg_s(cpacr_EL12);
+ case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12);
+ case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12);
+ case TCR_EL1: return read_sysreg_s(tcr_EL12);
+ case ESR_EL1: return read_sysreg_s(esr_EL12);
+ case AFSR0_EL1: return read_sysreg_s(afsr0_EL12);
+ case AFSR1_EL1: return read_sysreg_s(afsr1_EL12);
+ case FAR_EL1: return read_sysreg_s(far_EL12);
+ case MAIR_EL1: return read_sysreg_s(mair_EL12);
+ case VBAR_EL1: return read_sysreg_s(vbar_EL12);
+ case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12);
+ case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
+ case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0);
+ case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
+ case AMAIR_EL1: return read_sysreg_s(amair_EL12);
+ case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12);
+ case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1);
+ case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2);
+ case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2);
+ case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2);
+ }
+
+immediate_read:
+ return __vcpu_sys_reg(vcpu, reg);
+}
+
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+{
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ goto immediate_write;
+
+ /*
+ * System registers listed in the switch are not restored on every
+ * entry to the guest but are only restored on vcpu_load.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the the MPIDR should only be
+ * set once, before running the VCPU, and never changed later.
+ */
+ switch (reg) {
+ case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return;
+ case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return;
+ case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
+ case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return;
+ case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return;
+ case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return;
+ case TCR_EL1: write_sysreg_s(val, tcr_EL12); return;
+ case ESR_EL1: write_sysreg_s(val, esr_EL12); return;
+ case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return;
+ case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return;
+ case FAR_EL1: write_sysreg_s(val, far_EL12); return;
+ case MAIR_EL1: write_sysreg_s(val, mair_EL12); return;
+ case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return;
+ case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return;
+ case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
+ case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return;
+ case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
+ case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return;
+ case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return;
+ case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return;
+ case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return;
+ case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return;
+ case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return;
+ }
+
+immediate_write:
+ __vcpu_sys_reg(vcpu, reg) = val;
+}
+
/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
static u32 cache_levels;
@@ -121,16 +209,26 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
bool was_enabled = vcpu_has_cache_enabled(vcpu);
+ u64 val;
+ int reg = r->reg;
BUG_ON(!p->is_write);
- if (!p->is_aarch32) {
- vcpu_sys_reg(vcpu, r->reg) = p->regval;
+ /* See the 32bit mapping in kvm_host.h */
+ if (p->is_aarch32)
+ reg = r->reg / 2;
+
+ if (!p->is_aarch32 || !p->is_32bit) {
+ val = p->regval;
} else {
- if (!p->is_32bit)
- vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval);
- vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval);
+ val = vcpu_read_sys_reg(vcpu, reg);
+ if (r->reg % 2)
+ val = (p->regval << 32) | (u64)lower_32_bits(val);
+ else
+ val = ((u64)upper_32_bits(val) << 32) |
+ lower_32_bits(p->regval);
}
+ vcpu_write_sys_reg(vcpu, val, reg);
kvm_toggle_cache(vcpu, was_enabled);
return true;
@@ -175,6 +273,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
+static bool trap_undef(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -231,10 +337,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (p->is_write) {
- vcpu_sys_reg(vcpu, r->reg) = p->regval;
+ vcpu_write_sys_reg(vcpu, p->regval, r->reg);
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
- p->regval = vcpu_sys_reg(vcpu, r->reg);
+ p->regval = vcpu_read_sys_reg(vcpu, r->reg);
}
trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
@@ -447,7 +553,8 @@ static void reset_wcr(struct kvm_vcpu *vcpu,
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1);
+ u64 amair = read_sysreg(amair_el1);
+ vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1);
}
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -464,7 +571,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
- vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
+ vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1);
}
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -478,12 +585,12 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
- vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
}
static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
{
- u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+ u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0);
bool enabled = (reg & flags) || vcpu_mode_priv(vcpu);
if (!enabled)
@@ -525,14 +632,14 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
/* Only update writeable bits of PMCR */
- val = vcpu_sys_reg(vcpu, PMCR_EL0);
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
- vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
} else {
/* PMCR.P & PMCR.C are RAZ */
- val = vcpu_sys_reg(vcpu, PMCR_EL0)
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0)
& ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
p->regval = val;
}
@@ -550,10 +657,10 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
if (p->is_write)
- vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
+ __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
else
/* return PMSELR.SEL field */
- p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0)
& ARMV8_PMU_COUNTER_MASK;
return true;
@@ -586,7 +693,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
{
u64 pmcr, val;
- pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
+ pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
kvm_inject_undefined(vcpu);
@@ -611,7 +718,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
- idx = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ idx = __vcpu_sys_reg(vcpu, PMSELR_EL0)
& ARMV8_PMU_COUNTER_MASK;
} else if (r->Op2 == 0) {
/* PMCCNTR_EL0 */
@@ -666,7 +773,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
/* PMXEVTYPER_EL0 */
- idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
+ idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
reg = PMEVTYPER0_EL0 + idx;
} else if (r->CRn == 14 && (r->CRm & 12) == 12) {
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
@@ -684,9 +791,9 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
- vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
+ __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
} else {
- p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
+ p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
}
return true;
@@ -708,15 +815,15 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val = p->regval & mask;
if (r->Op2 & 0x1) {
/* accessing PMCNTENSET_EL0 */
- vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
kvm_pmu_enable_counter(vcpu, val);
} else {
/* accessing PMCNTENCLR_EL0 */
- vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
kvm_pmu_disable_counter(vcpu, val);
}
} else {
- p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
}
return true;
@@ -740,12 +847,12 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->Op2 & 0x1)
/* accessing PMINTENSET_EL1 */
- vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
+ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
else
/* accessing PMINTENCLR_EL1 */
- vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
} else {
- p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
}
return true;
@@ -765,12 +872,12 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
if (r->CRm & 0x2)
/* accessing PMOVSSET_EL0 */
- vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
else
/* accessing PMOVSCLR_EL0 */
- vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
} else {
- p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
}
return true;
@@ -807,10 +914,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
}
- vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
- & ARMV8_PMU_USERENR_MASK;
+ __vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
+ p->regval & ARMV8_PMU_USERENR_MASK;
} else {
- p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
+ p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0)
& ARMV8_PMU_USERENR_MASK;
}
@@ -889,10 +996,14 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
if (id == SYS_ID_AA64PFR0_EL1) {
if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT))
- pr_err_once("kvm [%i]: SVE unsupported for guests, suppressing\n",
- task_pid_nr(current));
+ kvm_debug("SVE unsupported for guests, suppressing\n");
val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
+ } else if (id == SYS_ID_AA64MMFR1_EL1) {
+ if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))
+ kvm_debug("LORegions unsupported for guests, suppressing\n");
+
+ val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT);
}
return val;
@@ -1178,6 +1289,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
+ { SYS_DESC(SYS_LORSA_EL1), trap_undef },
+ { SYS_DESC(SYS_LOREA_EL1), trap_undef },
+ { SYS_DESC(SYS_LORN_EL1), trap_undef },
+ { SYS_DESC(SYS_LORC_EL1), trap_undef },
+ { SYS_DESC(SYS_LORID_EL1), trap_undef },
+
{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
@@ -1545,6 +1662,11 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+ /* CNTP_TVAL */
+ { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval },
+ /* CNTP_CTL */
+ { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl },
+
/* PMEVCNTRn */
PMU_PMEVCNTR(0),
PMU_PMEVCNTR(1),
@@ -1618,6 +1740,7 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+ { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
};
/* Target specific emulation tables */
@@ -2194,7 +2317,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (r->get_user)
return (r->get_user)(vcpu, r, reg, uaddr);
- return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+ return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id);
}
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
@@ -2215,7 +2338,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (r->set_user)
return (r->set_user)(vcpu, r, reg, uaddr);
- return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+ return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
}
static unsigned int num_demux_regs(void)
@@ -2421,6 +2544,6 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
reset_sys_reg_descs(vcpu, table, num);
for (num = 1; num < NR_SYS_REGS; num++)
- if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
- panic("Didn't reset vcpu_sys_reg(%zi)", num);
+ if (__vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+ panic("Didn't reset __vcpu_sys_reg(%zi)", num);
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 060f5348ef25..cd710f8b63e0 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -89,14 +89,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+ __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
}
static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- vcpu_sys_reg(vcpu, r->reg) = r->val;
+ __vcpu_sys_reg(vcpu, r->reg) = r->val;
}
static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 969ade1d333d..ddb8497d18d6 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -38,13 +38,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
if (p->is_write)
return ignore_write(vcpu, p);
- p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1);
+ p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
return true;
}
static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
+ __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
}
/*
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
new file mode 100644
index 000000000000..c712a7376bc1
--- /dev/null
+++ b/arch/arm64/kvm/va_layout.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/random.h>
+#include <linux/memblock.h>
+#include <asm/alternative.h>
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * The LSB of the random hyp VA tag or 0 if no randomization is used.
+ */
+static u8 tag_lsb;
+/*
+ * The random hyp VA tag value with the region bit if hyp randomization is used
+ */
+static u64 tag_val;
+static u64 va_mask;
+
+static void compute_layout(void)
+{
+ phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
+ u64 hyp_va_msb;
+ int kva_msb;
+
+ /* Where is my RAM region? */
+ hyp_va_msb = idmap_addr & BIT(VA_BITS - 1);
+ hyp_va_msb ^= BIT(VA_BITS - 1);
+
+ kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^
+ (u64)(high_memory - 1));
+
+ if (kva_msb == (VA_BITS - 1)) {
+ /*
+ * No space in the address, let's compute the mask so
+ * that it covers (VA_BITS - 1) bits, and the region
+ * bit. The tag stays set to zero.
+ */
+ va_mask = BIT(VA_BITS - 1) - 1;
+ va_mask |= hyp_va_msb;
+ } else {
+ /*
+ * We do have some free bits to insert a random tag.
+ * Hyp VAs are now created from kernel linear map VAs
+ * using the following formula (with V == VA_BITS):
+ *
+ * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0
+ * ---------------------------------------------------------
+ * | 0000000 | hyp_va_msb | random tag | kern linear VA |
+ */
+ tag_lsb = kva_msb;
+ va_mask = GENMASK_ULL(tag_lsb - 1, 0);
+ tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb);
+ tag_val |= hyp_va_msb;
+ tag_val >>= tag_lsb;
+ }
+}
+
+static u32 compute_instruction(int n, u32 rd, u32 rn)
+{
+ u32 insn = AARCH64_BREAK_FAULT;
+
+ switch (n) {
+ case 0:
+ insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_AND,
+ AARCH64_INSN_VARIANT_64BIT,
+ rn, rd, va_mask);
+ break;
+
+ case 1:
+ /* ROR is a variant of EXTR with Rm = Rn */
+ insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+ rn, rn, rd,
+ tag_lsb);
+ break;
+
+ case 2:
+ insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+ tag_val & GENMASK(11, 0),
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+ break;
+
+ case 3:
+ insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+ tag_val & GENMASK(23, 12),
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+ break;
+
+ case 4:
+ /* ROR is a variant of EXTR with Rm = Rn */
+ insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+ rn, rn, rd, 64 - tag_lsb);
+ break;
+ }
+
+ return insn;
+}
+
+void __init kvm_update_va_mask(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ int i;
+
+ BUG_ON(nr_inst != 5);
+
+ if (!has_vhe() && !va_mask)
+ compute_layout();
+
+ for (i = 0; i < nr_inst; i++) {
+ u32 rd, rn, insn, oinsn;
+
+ /*
+ * VHE doesn't need any address translation, let's NOP
+ * everything.
+ *
+ * Alternatively, if we don't have any spare bits in
+ * the address, NOP everything after masking that
+ * kernel VA.
+ */
+ if (has_vhe() || (!tag_lsb && i > 0)) {
+ updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
+ continue;
+ }
+
+ oinsn = le32_to_cpu(origptr[i]);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
+ rn = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, oinsn);
+
+ insn = compute_instruction(i, rd, rn);
+ BUG_ON(insn == AARCH64_BREAK_FAULT);
+
+ updptr[i] = cpu_to_le32(insn);
+ }
+}
+
+void *__kvm_bp_vect_base;
+int __kvm_harden_el2_vector_slot;
+
+void kvm_patch_vector_branch(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u64 addr;
+ u32 insn;
+
+ BUG_ON(nr_inst != 5);
+
+ if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+ WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS));
+ return;
+ }
+
+ if (!va_mask)
+ compute_layout();
+
+ /*
+ * Compute HYP VA by using the same computation as kern_hyp_va()
+ */
+ addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector);
+ addr &= va_mask;
+ addr |= tag_val << tag_lsb;
+
+ /* Use PC[10:7] to branch to the same vector in KVM */
+ addr |= ((u64)origptr & GENMASK_ULL(10, 7));
+
+ /*
+ * Branch to the second instruction in the vectors in order to
+ * avoid the initial store on the stack (which we already
+ * perform in the hardening vectors).
+ */
+ addr += AARCH64_INSN_SIZE;
+
+ /* stp x0, x1, [sp, #-16]! */
+ insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,
+ AARCH64_INSN_REG_1,
+ AARCH64_INSN_REG_SP,
+ -16,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movz x0, #(addr & 0xffff) */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)addr,
+ 0,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_ZERO);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk x0, #((addr >> 16) & 0xffff), lsl #16 */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)(addr >> 16),
+ 16,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk x0, #((addr >> 32) & 0xffff), lsl #32 */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)(addr >> 32),
+ 32,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* br x0 */
+ insn = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_0,
+ AARCH64_INSN_BRANCH_NOLINK);
+ *updptr++ = cpu_to_le32(insn);
+}
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 0ead8a1d1679..137710f4dac3 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -19,5 +19,9 @@ CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \
-fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \
-fcall-saved-x18 -fomit-frame-pointer
CFLAGS_REMOVE_atomic_ll_sc.o := -pg
+GCOV_PROFILE_atomic_ll_sc.o := n
+KASAN_SANITIZE_atomic_ll_sc.o := n
+KCOV_INSTRUMENT_atomic_ll_sc.o := n
+UBSAN_SANITIZE_atomic_ll_sc.o := n
lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
index d3db9b2cd479..0fdff97794de 100644
--- a/arch/arm64/lib/tishift.S
+++ b/arch/arm64/lib/tishift.S
@@ -1,17 +1,6 @@
-/*
- * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2017-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <linux/linkage.h>
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a96ec0181818..db01f2709842 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -508,16 +508,6 @@ static int __init arm64_dma_init(void)
}
arch_initcall(arm64_dma_init);
-#define PREALLOC_DMA_DEBUG_ENTRIES 4096
-
-static int __init dma_debug_do_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
- return 0;
-}
-fs_initcall(dma_debug_do_init);
-
-
#ifdef CONFIG_IOMMU_DMA
#include <linux/dma-iommu.h>
#include <linux/platform_device.h>
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 4165485e8b6e..576f15153080 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -293,6 +293,57 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
static void __do_user_fault(struct siginfo *info, unsigned int esr)
{
current->thread.fault_address = (unsigned long)info->si_addr;
+
+ /*
+ * If the faulting address is in the kernel, we must sanitize the ESR.
+ * From userspace's point of view, kernel-only mappings don't exist
+ * at all, so we report them as level 0 translation faults.
+ * (This is not quite the way that "no mapping there at all" behaves:
+ * an alignment fault not caused by the memory type would take
+ * precedence over translation fault for a real access to empty
+ * space. Unfortunately we can't easily distinguish "alignment fault
+ * not caused by memory type" from "alignment fault caused by memory
+ * type", so we ignore this wrinkle and just return the translation
+ * fault.)
+ */
+ if (current->thread.fault_address >= TASK_SIZE) {
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_DABT_LOW:
+ /*
+ * These bits provide only information about the
+ * faulting instruction, which userspace knows already.
+ * We explicitly clear bits which are architecturally
+ * RES0 in case they are given meanings in future.
+ * We always report the ESR as if the fault was taken
+ * to EL1 and so ISV and the bits in ISS[23:14] are
+ * clear. (In fact it always will be a fault to EL1.)
+ */
+ esr &= ESR_ELx_EC_MASK | ESR_ELx_IL |
+ ESR_ELx_CM | ESR_ELx_WNR;
+ esr |= ESR_ELx_FSC_FAULT;
+ break;
+ case ESR_ELx_EC_IABT_LOW:
+ /*
+ * Claim a level 0 translation fault.
+ * All other bits are architecturally RES0 for faults
+ * reported with that DFSC value, so we clear them.
+ */
+ esr &= ESR_ELx_EC_MASK | ESR_ELx_IL;
+ esr |= ESR_ELx_FSC_FAULT;
+ break;
+ default:
+ /*
+ * This should never happen (entry.S only brings us
+ * into this code for insn and data aborts from a lower
+ * exception level). Fail safe by not providing an ESR
+ * context record at all.
+ */
+ WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr);
+ esr = 0;
+ break;
+ }
+ }
+
current->thread.fault_code = esr;
arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current);
}
@@ -305,11 +356,12 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
*/
if (user_mode(regs)) {
const struct fault_info *inf = esr_to_fault_info(esr);
- struct siginfo si = {
- .si_signo = inf->sig,
- .si_code = inf->code,
- .si_addr = (void __user *)addr,
- };
+ struct siginfo si;
+
+ clear_siginfo(&si);
+ si.si_signo = inf->sig;
+ si.si_code = inf->code;
+ si.si_addr = (void __user *)addr;
__do_user_fault(&si, esr);
} else {
@@ -583,6 +635,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
nmi_exit();
}
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
@@ -687,6 +740,7 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
show_pte(addr);
}
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
@@ -729,6 +783,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
local_irq_enable();
}
+ clear_siginfo(&info);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRALN;
@@ -772,7 +827,6 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
struct pt_regs *regs)
{
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
- struct siginfo info;
int rv;
/*
@@ -788,6 +842,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
if (!inf->fn(addr, esr, regs)) {
rv = 1;
} else {
+ struct siginfo info;
+
+ clear_siginfo(&info);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index e36ed5087b5c..1059884f9a6f 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -58,7 +58,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
flush_ptrace_access(vma, page, uaddr, dst, len);
}
-void __sync_icache_dcache(pte_t pte, unsigned long addr)
+void __sync_icache_dcache(pte_t pte)
{
struct page *page = pte_page(pte);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9f3c47acf8ff..1b18b4722420 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -646,8 +646,10 @@ static int keep_initrd __initdata;
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- if (!keep_initrd)
+ if (!keep_initrd) {
free_reserved_area((void *)start, (void *)end, 0, "initrd");
+ memblock_free(__virt_to_phys(start), end - start);
+ }
}
static int __init keepinitrd_setup(char *__unused)
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index dabfc1ecda3d..12145874c02b 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -204,7 +204,7 @@ void __init kasan_init(void)
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
- pfn_to_nid(virt_to_pfn(lm_alias(_text))));
+ early_pfn_to_nid(virt_to_pfn(lm_alias(_text))));
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
(void *)mod_shadow_start);
@@ -224,7 +224,7 @@ void __init kasan_init(void)
kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
(unsigned long)kasan_mem_to_shadow(end),
- pfn_to_nid(virt_to_pfn(start)));
+ early_pfn_to_nid(virt_to_pfn(start)));
}
/*
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index decccffb03ca..842c8a5fcd53 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -38,12 +38,12 @@
#define MIN_GAP (SZ_128M)
#define MAX_GAP (STACK_TOP/6*5)
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
- if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
@@ -62,9 +62,9 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
- unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
/* Values close to RLIM_INFINITY can overflow. */
@@ -83,7 +83,7 @@ static unsigned long mmap_base(unsigned long rnd)
* This function, called very early during the creation of a new process VM
* image, sets up which VM layout function to use:
*/
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
@@ -94,11 +94,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality bit is set, or
* if the expected stack growth is unlimited:
*/
- if (mmap_is_legacy()) {
+ if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
- mm->mmap_base = mmap_base(random_factor);
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 2dbb2c9f1ec1..493ff75670ff 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -933,13 +933,15 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
+ pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
- /* ioremap_page_range doesn't honour BBM */
- if (pud_present(READ_ONCE(*pudp)))
+ /* Only allow permission changes for now */
+ if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
+ pud_val(new_pud)))
return 0;
BUG_ON(phys & ~PUD_MASK);
- set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
+ set_pud(pudp, new_pud);
return 1;
}
@@ -947,13 +949,15 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
{
pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
pgprot_val(mk_sect_prot(prot)));
+ pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
- /* ioremap_page_range doesn't honour BBM */
- if (pmd_present(READ_ONCE(*pmdp)))
+ /* Only allow permission changes for now */
+ if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
+ pmd_val(new_pmd)))
return 0;
BUG_ON(phys & ~PMD_MASK);
- set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+ set_pmd(pmdp, new_pmd);
return 1;
}
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index a93350451e8e..a6fdaea07c63 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -21,7 +21,6 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/printk.h>
-#include <linux/skbuff.h>
#include <linux/slab.h>
#include <asm/byteorder.h>
@@ -80,23 +79,66 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
ctx->idx++;
}
+static inline void emit_a64_mov_i(const int is64, const int reg,
+ const s32 val, struct jit_ctx *ctx)
+{
+ u16 hi = val >> 16;
+ u16 lo = val & 0xffff;
+
+ if (hi & 0x8000) {
+ if (hi == 0xffff) {
+ emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
+ } else {
+ emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
+ if (lo != 0xffff)
+ emit(A64_MOVK(is64, reg, lo, 0), ctx);
+ }
+ } else {
+ emit(A64_MOVZ(is64, reg, lo, 0), ctx);
+ if (hi)
+ emit(A64_MOVK(is64, reg, hi, 16), ctx);
+ }
+}
+
+static int i64_i16_blocks(const u64 val, bool inverse)
+{
+ return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+ (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+ (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+ (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
+}
+
static inline void emit_a64_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
- u64 tmp = val;
- int shift = 0;
-
- emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
- tmp >>= 16;
- shift += 16;
- while (tmp) {
- if (tmp & 0xffff)
- emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
- tmp >>= 16;
- shift += 16;
+ u64 nrm_tmp = val, rev_tmp = ~val;
+ bool inverse;
+ int shift;
+
+ if (!(nrm_tmp >> 32))
+ return emit_a64_mov_i(0, reg, (u32)val, ctx);
+
+ inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
+ shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
+ (fls64(nrm_tmp) - 1)), 16), 0);
+ if (inverse)
+ emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
+ else
+ emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
+ shift -= 16;
+ while (shift >= 0) {
+ if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
+ emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
+ shift -= 16;
}
}
+/*
+ * This is an unoptimized 64 immediate emission used for BPF to BPF call
+ * addresses. It will always do a full 64 bit decomposition as otherwise
+ * more complexity in the last extra pass is required since we previously
+ * reserved 4 instructions for the address.
+ */
static inline void emit_addr_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
@@ -111,26 +153,6 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
}
}
-static inline void emit_a64_mov_i(const int is64, const int reg,
- const s32 val, struct jit_ctx *ctx)
-{
- u16 hi = val >> 16;
- u16 lo = val & 0xffff;
-
- if (hi & 0x8000) {
- if (hi == 0xffff) {
- emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
- } else {
- emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
- emit(A64_MOVK(is64, reg, lo, 0), ctx);
- }
- } else {
- emit(A64_MOVZ(is64, reg, lo, 0), ctx);
- if (hi)
- emit(A64_MOVK(is64, reg, hi, 16), ctx);
- }
-}
-
static inline int bpf2a64_offset(int bpf_to, int bpf_from,
const struct jit_ctx *ctx)
{
@@ -163,7 +185,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
/* Tail call offset to jump into */
#define PROLOGUE_OFFSET 7
-static int build_prologue(struct jit_ctx *ctx)
+static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
{
const struct bpf_prog *prog = ctx->prog;
const u8 r6 = bpf2a64[BPF_REG_6];
@@ -188,7 +210,7 @@ static int build_prologue(struct jit_ctx *ctx)
* | ... | BPF prog stack
* | |
* +-----+ <= (BPF_FP - prog->aux->stack_depth)
- * |RSVD | JIT scratchpad
+ * |RSVD | padding
* current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size)
* | |
* | ... | Function call stack
@@ -210,19 +232,19 @@ static int build_prologue(struct jit_ctx *ctx)
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
- /* Initialize tail_call_cnt */
- emit(A64_MOVZ(1, tcc, 0, 0), ctx);
+ if (!ebpf_from_cbpf) {
+ /* Initialize tail_call_cnt */
+ emit(A64_MOVZ(1, tcc, 0, 0), ctx);
- cur_offset = ctx->idx - idx0;
- if (cur_offset != PROLOGUE_OFFSET) {
- pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
- cur_offset, PROLOGUE_OFFSET);
- return -1;
+ cur_offset = ctx->idx - idx0;
+ if (cur_offset != PROLOGUE_OFFSET) {
+ pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
+ cur_offset, PROLOGUE_OFFSET);
+ return -1;
+ }
}
- /* 4 byte extra for skb_copy_bits buffer */
- ctx->stack_size = prog->aux->stack_depth + 4;
- ctx->stack_size = STACK_ALIGN(ctx->stack_size);
+ ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);
/* Set up function call stack */
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
@@ -723,71 +745,6 @@ emit_cond_jmp:
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
break;
- /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
- case BPF_LD | BPF_ABS | BPF_W:
- case BPF_LD | BPF_ABS | BPF_H:
- case BPF_LD | BPF_ABS | BPF_B:
- /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
- case BPF_LD | BPF_IND | BPF_W:
- case BPF_LD | BPF_IND | BPF_H:
- case BPF_LD | BPF_IND | BPF_B:
- {
- const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
- const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
- const u8 fp = bpf2a64[BPF_REG_FP];
- const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
- const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
- const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
- const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
- const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
- int size;
-
- emit(A64_MOV(1, r1, r6), ctx);
- emit_a64_mov_i(0, r2, imm, ctx);
- if (BPF_MODE(code) == BPF_IND)
- emit(A64_ADD(0, r2, r2, src), ctx);
- switch (BPF_SIZE(code)) {
- case BPF_W:
- size = 4;
- break;
- case BPF_H:
- size = 2;
- break;
- case BPF_B:
- size = 1;
- break;
- default:
- return -EINVAL;
- }
- emit_a64_mov_i64(r3, size, ctx);
- emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx);
- emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
- emit(A64_BLR(r5), ctx);
- emit(A64_MOV(1, r0, A64_R(0)), ctx);
-
- jmp_offset = epilogue_offset(ctx);
- check_imm19(jmp_offset);
- emit(A64_CBZ(1, r0, jmp_offset), ctx);
- emit(A64_MOV(1, r5, r0), ctx);
- switch (BPF_SIZE(code)) {
- case BPF_W:
- emit(A64_LDR32(r0, r5, A64_ZR), ctx);
-#ifndef CONFIG_CPU_BIG_ENDIAN
- emit(A64_REV32(0, r0, r0), ctx);
-#endif
- break;
- case BPF_H:
- emit(A64_LDRH(r0, r5, A64_ZR), ctx);
-#ifndef CONFIG_CPU_BIG_ENDIAN
- emit(A64_REV16(0, r0, r0), ctx);
-#endif
- break;
- case BPF_B:
- emit(A64_LDRB(r0, r5, A64_ZR), ctx);
- break;
- }
- break;
- }
default:
pr_err_once("unknown opcode %02x\n", code);
return -EINVAL;
@@ -851,6 +808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
struct arm64_jit_data *jit_data;
+ bool was_classic = bpf_prog_was_classic(prog);
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
@@ -905,7 +863,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}
- if (build_prologue(&ctx)) {
+ if (build_prologue(&ctx, was_classic)) {
prog = orig_prog;
goto out_off;
}
@@ -928,7 +886,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
skip_init_ctx:
ctx.idx = 0;
- build_prologue(&ctx);
+ build_prologue(&ctx, was_classic);
if (build_body(&ctx)) {
bpf_jit_binary_free(header);
OpenPOWER on IntegriCloud