summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/blowfish_glue.c3
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c3
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S29
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c14
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S708
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c53
-rw-r--r--arch/x86/include/asm/archrandom.h42
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/asm/xen/page.h11
-rw-r--r--arch/x86/include/asm/xsave.h2
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c13
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c6
-rw-r--r--arch/x86/kernel/cpuid.c15
-rw-r--r--arch/x86/kernel/ftrace.c55
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/kvm.c1
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/msr.c16
-rw-r--r--arch/x86/kernel/vsyscall_64.c6
-rw-r--r--arch/x86/kvm/cpuid.c37
-rw-r--r--arch/x86/kvm/emulate.c8
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c84
-rw-r--r--arch/x86/kvm/vmx.c334
-rw-r--r--arch/x86/kvm/x86.c152
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--arch/x86/net/bpf_jit_comp.c11
-rw-r--r--arch/x86/oprofile/nmi_int.c15
-rw-r--r--arch/x86/pci/amd_bus.c5
-rw-r--r--arch/x86/pci/xen.c29
-rw-r--r--arch/x86/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/xen/Kconfig5
-rw-r--r--arch/x86/xen/p2m.c121
42 files changed, 1525 insertions, 348 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 6ba54d640383..61d6e281898b 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
+ifeq ($(avx2_supported),yes)
+sha1-ssse3-y += sha1_avx2_x86_64_asm.o
+endif
crc32c-intel-y := crc32c-intel_glue.o
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 50ec333b70e6..8af519ed73d1 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -223,9 +223,6 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 4);
-
- if (nbytes < bsize)
- goto done;
}
/* Handle leftovers */
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index e6a3700489b9..e57e20ab5e0b 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -203,9 +203,6 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
src -= 1;
dst -= 1;
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
}
/* Handle leftovers */
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 586f41aac361..185fad49d86f 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -24,10 +24,6 @@
.align 16
.Lbswap_mask:
.octa 0x000102030405060708090a0b0c0d0e0f
-.Lpoly:
- .octa 0xc2000000000000000000000000000001
-.Ltwo_one:
- .octa 0x00000001000000000000000000000001
#define DATA %xmm0
#define SHASH %xmm1
@@ -134,28 +130,3 @@ ENTRY(clmul_ghash_update)
.Lupdate_just_ret:
ret
ENDPROC(clmul_ghash_update)
-
-/*
- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
- *
- * Calculate hash_key << 1 mod poly
- */
-ENTRY(clmul_ghash_setkey)
- movaps .Lbswap_mask, BSWAP
- movups (%rsi), %xmm0
- PSHUFB_XMM BSWAP %xmm0
- movaps %xmm0, %xmm1
- psllq $1, %xmm0
- psrlq $63, %xmm1
- movaps %xmm1, %xmm2
- pslldq $8, %xmm1
- psrldq $8, %xmm2
- por %xmm1, %xmm0
- # reduction
- pshufd $0b00100100, %xmm2, %xmm1
- pcmpeqd .Ltwo_one, %xmm1
- pand .Lpoly, %xmm1
- pxor %xmm1, %xmm0
- movups %xmm0, (%rdi)
- ret
-ENDPROC(clmul_ghash_setkey)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 6759dd1135be..d785cf2c529c 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
const be128 *shash);
-void clmul_ghash_setkey(be128 *shash, const u8 *key);
-
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ be128 *x = (be128 *)key;
+ u64 a, b;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
- clmul_ghash_setkey(&ctx->shash, key);
+ /* perform multiplication by 'x' in GF(2^128) */
+ a = be64_to_cpu(x->a);
+ b = be64_to_cpu(x->b);
+
+ ctx->shash.a = (__be64)((b << 1) | (a >> 63));
+ ctx->shash.b = (__be64)((a << 1) | (b >> 63));
+
+ if (a >> 63)
+ ctx->shash.b ^= cpu_to_be64(0xc2);
return 0;
}
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
new file mode 100644
index 000000000000..1cd792db15ef
--- /dev/null
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -0,0 +1,708 @@
+/*
+ * Implement fast SHA-1 with AVX2 instructions. (x86_64)
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Ilya Albrekht <ilya.albrekht@intel.com>
+ * Maxim Locktyukhin <maxim.locktyukhin@intel.com>
+ * Ronen Zohar <ronen.zohar@intel.com>
+ * Chandramouli Narayanan <mouli@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * SHA-1 implementation with Intel(R) AVX2 instruction set extensions.
+ *
+ *This implementation is based on the previous SSSE3 release:
+ *Visit http://software.intel.com/en-us/articles/
+ *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
+ *
+ *Updates 20-byte SHA-1 record in 'hash' for even number of
+ *'num_blocks' consecutive 64-byte blocks
+ *
+ *extern "C" void sha1_transform_avx2(
+ * int *hash, const char* input, size_t num_blocks );
+ */
+
+#include <linux/linkage.h>
+
+#define CTX %rdi /* arg1 */
+#define BUF %rsi /* arg2 */
+#define CNT %rdx /* arg3 */
+
+#define REG_A %ecx
+#define REG_B %esi
+#define REG_C %edi
+#define REG_D %eax
+#define REG_E %edx
+#define REG_TB %ebx
+#define REG_TA %r12d
+#define REG_RA %rcx
+#define REG_RB %rsi
+#define REG_RC %rdi
+#define REG_RD %rax
+#define REG_RE %rdx
+#define REG_RTA %r12
+#define REG_RTB %rbx
+#define REG_T1 %ebp
+#define xmm_mov vmovups
+#define avx2_zeroupper vzeroupper
+#define RND_F1 1
+#define RND_F2 2
+#define RND_F3 3
+
+.macro REGALLOC
+ .set A, REG_A
+ .set B, REG_B
+ .set C, REG_C
+ .set D, REG_D
+ .set E, REG_E
+ .set TB, REG_TB
+ .set TA, REG_TA
+
+ .set RA, REG_RA
+ .set RB, REG_RB
+ .set RC, REG_RC
+ .set RD, REG_RD
+ .set RE, REG_RE
+
+ .set RTA, REG_RTA
+ .set RTB, REG_RTB
+
+ .set T1, REG_T1
+.endm
+
+#define K_BASE %r8
+#define HASH_PTR %r9
+#define BUFFER_PTR %r10
+#define BUFFER_PTR2 %r13
+#define BUFFER_END %r11
+
+#define PRECALC_BUF %r14
+#define WK_BUF %r15
+
+#define W_TMP %xmm0
+#define WY_TMP %ymm0
+#define WY_TMP2 %ymm9
+
+# AVX2 variables
+#define WY0 %ymm3
+#define WY4 %ymm5
+#define WY08 %ymm7
+#define WY12 %ymm8
+#define WY16 %ymm12
+#define WY20 %ymm13
+#define WY24 %ymm14
+#define WY28 %ymm15
+
+#define YMM_SHUFB_BSWAP %ymm10
+
+/*
+ * Keep 2 iterations precalculated at a time:
+ * - 80 DWORDs per iteration * 2
+ */
+#define W_SIZE (80*2*2 +16)
+
+#define WK(t) ((((t) % 80) / 4)*32 + ( (t) % 4)*4 + ((t)/80)*16 )(WK_BUF)
+#define PRECALC_WK(t) ((t)*2*2)(PRECALC_BUF)
+
+
+.macro UPDATE_HASH hash, val
+ add \hash, \val
+ mov \val, \hash
+.endm
+
+.macro PRECALC_RESET_WY
+ .set WY_00, WY0
+ .set WY_04, WY4
+ .set WY_08, WY08
+ .set WY_12, WY12
+ .set WY_16, WY16
+ .set WY_20, WY20
+ .set WY_24, WY24
+ .set WY_28, WY28
+ .set WY_32, WY_00
+.endm
+
+.macro PRECALC_ROTATE_WY
+ /* Rotate macros */
+ .set WY_32, WY_28
+ .set WY_28, WY_24
+ .set WY_24, WY_20
+ .set WY_20, WY_16
+ .set WY_16, WY_12
+ .set WY_12, WY_08
+ .set WY_08, WY_04
+ .set WY_04, WY_00
+ .set WY_00, WY_32
+
+ /* Define register aliases */
+ .set WY, WY_00
+ .set WY_minus_04, WY_04
+ .set WY_minus_08, WY_08
+ .set WY_minus_12, WY_12
+ .set WY_minus_16, WY_16
+ .set WY_minus_20, WY_20
+ .set WY_minus_24, WY_24
+ .set WY_minus_28, WY_28
+ .set WY_minus_32, WY
+.endm
+
+.macro PRECALC_00_15
+ .if (i == 0) # Initialize and rotate registers
+ PRECALC_RESET_WY
+ PRECALC_ROTATE_WY
+ .endif
+
+ /* message scheduling pre-compute for rounds 0-15 */
+ .if ((i & 7) == 0)
+ /*
+ * blended AVX2 and ALU instruction scheduling
+ * 1 vector iteration per 8 rounds
+ */
+ vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
+ .elseif ((i & 7) == 1)
+ vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
+ WY_TMP, WY_TMP
+ .elseif ((i & 7) == 2)
+ vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
+ .elseif ((i & 7) == 4)
+ vpaddd K_XMM(K_BASE), WY, WY_TMP
+ .elseif ((i & 7) == 7)
+ vmovdqu WY_TMP, PRECALC_WK(i&~7)
+
+ PRECALC_ROTATE_WY
+ .endif
+.endm
+
+.macro PRECALC_16_31
+ /*
+ * message scheduling pre-compute for rounds 16-31
+ * calculating last 32 w[i] values in 8 XMM registers
+ * pre-calculate K+w[i] values and store to mem
+ * for later load by ALU add instruction
+ *
+ * "brute force" vectorization for rounds 16-31 only
+ * due to w[i]->w[i-3] dependency
+ */
+ .if ((i & 7) == 0)
+ /*
+ * blended AVX2 and ALU instruction scheduling
+ * 1 vector iteration per 8 rounds
+ */
+ /* w[i-14] */
+ vpalignr $8, WY_minus_16, WY_minus_12, WY
+ vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */
+ .elseif ((i & 7) == 1)
+ vpxor WY_minus_08, WY, WY
+ vpxor WY_minus_16, WY_TMP, WY_TMP
+ .elseif ((i & 7) == 2)
+ vpxor WY_TMP, WY, WY
+ vpslldq $12, WY, WY_TMP2
+ .elseif ((i & 7) == 3)
+ vpslld $1, WY, WY_TMP
+ vpsrld $31, WY, WY
+ .elseif ((i & 7) == 4)
+ vpor WY, WY_TMP, WY_TMP
+ vpslld $2, WY_TMP2, WY
+ .elseif ((i & 7) == 5)
+ vpsrld $30, WY_TMP2, WY_TMP2
+ vpxor WY, WY_TMP, WY_TMP
+ .elseif ((i & 7) == 7)
+ vpxor WY_TMP2, WY_TMP, WY
+ vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vmovdqu WY_TMP, PRECALC_WK(i&~7)
+
+ PRECALC_ROTATE_WY
+ .endif
+.endm
+
+.macro PRECALC_32_79
+ /*
+ * in SHA-1 specification:
+ * w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1
+ * instead we do equal:
+ * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
+ * allows more efficient vectorization
+ * since w[i]=>w[i-3] dependency is broken
+ */
+
+ .if ((i & 7) == 0)
+ /*
+ * blended AVX2 and ALU instruction scheduling
+ * 1 vector iteration per 8 rounds
+ */
+ vpalignr $8, WY_minus_08, WY_minus_04, WY_TMP
+ .elseif ((i & 7) == 1)
+ /* W is W_minus_32 before xor */
+ vpxor WY_minus_28, WY, WY
+ .elseif ((i & 7) == 2)
+ vpxor WY_minus_16, WY_TMP, WY_TMP
+ .elseif ((i & 7) == 3)
+ vpxor WY_TMP, WY, WY
+ .elseif ((i & 7) == 4)
+ vpslld $2, WY, WY_TMP
+ .elseif ((i & 7) == 5)
+ vpsrld $30, WY, WY
+ vpor WY, WY_TMP, WY
+ .elseif ((i & 7) == 7)
+ vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vmovdqu WY_TMP, PRECALC_WK(i&~7)
+
+ PRECALC_ROTATE_WY
+ .endif
+.endm
+
+.macro PRECALC r, s
+ .set i, \r
+
+ .if (i < 40)
+ .set K_XMM, 32*0
+ .elseif (i < 80)
+ .set K_XMM, 32*1
+ .elseif (i < 120)
+ .set K_XMM, 32*2
+ .else
+ .set K_XMM, 32*3
+ .endif
+
+ .if (i<32)
+ PRECALC_00_15 \s
+ .elseif (i<64)
+ PRECALC_16_31 \s
+ .elseif (i < 160)
+ PRECALC_32_79 \s
+ .endif
+.endm
+
+.macro ROTATE_STATE
+ .set T_REG, E
+ .set E, D
+ .set D, C
+ .set C, B
+ .set B, TB
+ .set TB, A
+ .set A, T_REG
+
+ .set T_REG, RE
+ .set RE, RD
+ .set RD, RC
+ .set RC, RB
+ .set RB, RTB
+ .set RTB, RA
+ .set RA, T_REG
+.endm
+
+/* Macro relies on saved ROUND_Fx */
+
+.macro RND_FUN f, r
+ .if (\f == RND_F1)
+ ROUND_F1 \r
+ .elseif (\f == RND_F2)
+ ROUND_F2 \r
+ .elseif (\f == RND_F3)
+ ROUND_F3 \r
+ .endif
+.endm
+
+.macro RR r
+ .set round_id, (\r % 80)
+
+ .if (round_id == 0) /* Precalculate F for first round */
+ .set ROUND_FUNC, RND_F1
+ mov B, TB
+
+ rorx $(32-30), B, B /* b>>>2 */
+ andn D, TB, T1
+ and C, TB
+ xor T1, TB
+ .endif
+
+ RND_FUN ROUND_FUNC, \r
+ ROTATE_STATE
+
+ .if (round_id == 18)
+ .set ROUND_FUNC, RND_F2
+ .elseif (round_id == 38)
+ .set ROUND_FUNC, RND_F3
+ .elseif (round_id == 58)
+ .set ROUND_FUNC, RND_F2
+ .endif
+
+ .set round_id, ( (\r+1) % 80)
+
+ RND_FUN ROUND_FUNC, (\r+1)
+ ROTATE_STATE
+.endm
+
+.macro ROUND_F1 r
+ add WK(\r), E
+
+ andn C, A, T1 /* ~b&d */
+ lea (RE,RTB), E /* Add F from the previous round */
+
+ rorx $(32-5), A, TA /* T2 = A >>> 5 */
+ rorx $(32-30),A, TB /* b>>>2 for next round */
+
+ PRECALC (\r) /* msg scheduling for next 2 blocks */
+
+ /*
+ * Calculate F for the next round
+ * (b & c) ^ andn[b, d]
+ */
+ and B, A /* b&c */
+ xor T1, A /* F1 = (b&c) ^ (~b&d) */
+
+ lea (RE,RTA), E /* E += A >>> 5 */
+.endm
+
+.macro ROUND_F2 r
+ add WK(\r), E
+ lea (RE,RTB), E /* Add F from the previous round */
+
+ /* Calculate F for the next round */
+ rorx $(32-5), A, TA /* T2 = A >>> 5 */
+ .if ((round_id) < 79)
+ rorx $(32-30), A, TB /* b>>>2 for next round */
+ .endif
+ PRECALC (\r) /* msg scheduling for next 2 blocks */
+
+ .if ((round_id) < 79)
+ xor B, A
+ .endif
+
+ add TA, E /* E += A >>> 5 */
+
+ .if ((round_id) < 79)
+ xor C, A
+ .endif
+.endm
+
+.macro ROUND_F3 r
+ add WK(\r), E
+ PRECALC (\r) /* msg scheduling for next 2 blocks */
+
+ lea (RE,RTB), E /* Add F from the previous round */
+
+ mov B, T1
+ or A, T1
+
+ rorx $(32-5), A, TA /* T2 = A >>> 5 */
+ rorx $(32-30), A, TB /* b>>>2 for next round */
+
+ /* Calculate F for the next round
+ * (b and c) or (d and (b or c))
+ */
+ and C, T1
+ and B, A
+ or T1, A
+
+ add TA, E /* E += A >>> 5 */
+
+.endm
+
+/*
+ * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
+ */
+.macro SHA1_PIPELINED_MAIN_BODY
+
+ REGALLOC
+
+ mov (HASH_PTR), A
+ mov 4(HASH_PTR), B
+ mov 8(HASH_PTR), C
+ mov 12(HASH_PTR), D
+ mov 16(HASH_PTR), E
+
+ mov %rsp, PRECALC_BUF
+ lea (2*4*80+32)(%rsp), WK_BUF
+
+ # Precalc WK for first 2 blocks
+ PRECALC_OFFSET = 0
+ .set i, 0
+ .rept 160
+ PRECALC i
+ .set i, i + 1
+ .endr
+ PRECALC_OFFSET = 128
+ xchg WK_BUF, PRECALC_BUF
+
+ .align 32
+_loop:
+ /*
+ * code loops through more than one block
+ * we use K_BASE value as a signal of a last block,
+ * it is set below by: cmovae BUFFER_PTR, K_BASE
+ */
+ cmp K_BASE, BUFFER_PTR
+ jne _begin
+ .align 32
+ jmp _end
+ .align 32
+_begin:
+
+ /*
+ * Do first block
+ * rounds: 0,2,4,6,8
+ */
+ .set j, 0
+ .rept 5
+ RR j
+ .set j, j+2
+ .endr
+
+ jmp _loop0
+_loop0:
+
+ /*
+ * rounds:
+ * 10,12,14,16,18
+ * 20,22,24,26,28
+ * 30,32,34,36,38
+ * 40,42,44,46,48
+ * 50,52,54,56,58
+ */
+ .rept 25
+ RR j
+ .set j, j+2
+ .endr
+
+ add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
+ cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
+ cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
+
+ /*
+ * rounds
+ * 60,62,64,66,68
+ * 70,72,74,76,78
+ */
+ .rept 10
+ RR j
+ .set j, j+2
+ .endr
+
+ UPDATE_HASH (HASH_PTR), A
+ UPDATE_HASH 4(HASH_PTR), TB
+ UPDATE_HASH 8(HASH_PTR), C
+ UPDATE_HASH 12(HASH_PTR), D
+ UPDATE_HASH 16(HASH_PTR), E
+
+ cmp K_BASE, BUFFER_PTR /* is current block the last one? */
+ je _loop
+
+ mov TB, B
+
+ /* Process second block */
+ /*
+ * rounds
+ * 0+80, 2+80, 4+80, 6+80, 8+80
+ * 10+80,12+80,14+80,16+80,18+80
+ */
+
+ .set j, 0
+ .rept 10
+ RR j+80
+ .set j, j+2
+ .endr
+
+ jmp _loop1
+_loop1:
+ /*
+ * rounds
+ * 20+80,22+80,24+80,26+80,28+80
+ * 30+80,32+80,34+80,36+80,38+80
+ */
+ .rept 10
+ RR j+80
+ .set j, j+2
+ .endr
+
+ jmp _loop2
+_loop2:
+
+ /*
+ * rounds
+ * 40+80,42+80,44+80,46+80,48+80
+ * 50+80,52+80,54+80,56+80,58+80
+ */
+ .rept 10
+ RR j+80
+ .set j, j+2
+ .endr
+
+ add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
+
+ cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
+ cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
+
+ jmp _loop3
+_loop3:
+
+ /*
+ * rounds
+ * 60+80,62+80,64+80,66+80,68+80
+ * 70+80,72+80,74+80,76+80,78+80
+ */
+ .rept 10
+ RR j+80
+ .set j, j+2
+ .endr
+
+ UPDATE_HASH (HASH_PTR), A
+ UPDATE_HASH 4(HASH_PTR), TB
+ UPDATE_HASH 8(HASH_PTR), C
+ UPDATE_HASH 12(HASH_PTR), D
+ UPDATE_HASH 16(HASH_PTR), E
+
+ /* Reset state for AVX2 reg permutation */
+ mov A, TA
+ mov TB, A
+ mov C, TB
+ mov E, C
+ mov D, B
+ mov TA, D
+
+ REGALLOC
+
+ xchg WK_BUF, PRECALC_BUF
+
+ jmp _loop
+
+ .align 32
+ _end:
+
+.endm
+/*
+ * macro implements SHA-1 function's body for several 64-byte blocks
+ * param: function's name
+ */
+.macro SHA1_VECTOR_ASM name
+ ENTRY(\name)
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ RESERVE_STACK = (W_SIZE*4 + 8+24)
+
+ /* Align stack */
+ mov %rsp, %rbx
+ and $~(0x20-1), %rsp
+ push %rbx
+ sub $RESERVE_STACK, %rsp
+
+ avx2_zeroupper
+
+ lea K_XMM_AR(%rip), K_BASE
+
+ mov CTX, HASH_PTR
+ mov BUF, BUFFER_PTR
+ lea 64(BUF), BUFFER_PTR2
+
+ shl $6, CNT /* mul by 64 */
+ add BUF, CNT
+ add $64, CNT
+ mov CNT, BUFFER_END
+
+ cmp BUFFER_END, BUFFER_PTR2
+ cmovae K_BASE, BUFFER_PTR2
+
+ xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
+
+ SHA1_PIPELINED_MAIN_BODY
+
+ avx2_zeroupper
+
+ add $RESERVE_STACK, %rsp
+ pop %rsp
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+
+ ENDPROC(\name)
+.endm
+
+.section .rodata
+
+#define K1 0x5a827999
+#define K2 0x6ed9eba1
+#define K3 0x8f1bbcdc
+#define K4 0xca62c1d6
+
+.align 128
+K_XMM_AR:
+ .long K1, K1, K1, K1
+ .long K1, K1, K1, K1
+ .long K2, K2, K2, K2
+ .long K2, K2, K2, K2
+ .long K3, K3, K3, K3
+ .long K3, K3, K3, K3
+ .long K4, K4, K4, K4
+ .long K4, K4, K4, K4
+
+BSWAP_SHUFB_CTL:
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+.text
+
+SHA1_VECTOR_ASM sha1_transform_avx2
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 4a11a9d72451..74d16ef707c7 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -10,6 +10,7 @@
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
+ * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -39,6 +40,12 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
unsigned int rounds);
#endif
+#ifdef CONFIG_AS_AVX2
+#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */
+
+asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
+ unsigned int rounds);
+#endif
static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
@@ -165,6 +172,18 @@ static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
return 0;
}
+#ifdef CONFIG_AS_AVX2
+static void sha1_apply_transform_avx2(u32 *digest, const char *data,
+ unsigned int rounds)
+{
+ /* Select the optimal transform based on data block size */
+ if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
+ sha1_transform_avx2(digest, data, rounds);
+ else
+ sha1_transform_avx(digest, data, rounds);
+}
+#endif
+
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_ssse3_init,
@@ -201,27 +220,49 @@ static bool __init avx_usable(void)
return true;
}
+
+#ifdef CONFIG_AS_AVX2
+static bool __init avx2_usable(void)
+{
+ if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) &&
+ boot_cpu_has(X86_FEATURE_BMI2))
+ return true;
+
+ return false;
+}
+#endif
#endif
static int __init sha1_ssse3_mod_init(void)
{
+ char *algo_name;
+
/* test for SSSE3 first */
- if (cpu_has_ssse3)
+ if (cpu_has_ssse3) {
sha1_transform_asm = sha1_transform_ssse3;
+ algo_name = "SSSE3";
+ }
#ifdef CONFIG_AS_AVX
/* allow AVX to override SSSE3, it's a little faster */
- if (avx_usable())
+ if (avx_usable()) {
sha1_transform_asm = sha1_transform_avx;
+ algo_name = "AVX";
+#ifdef CONFIG_AS_AVX2
+ /* allow AVX2 to override AVX, it's a little faster */
+ if (avx2_usable()) {
+ sha1_transform_asm = sha1_apply_transform_avx2;
+ algo_name = "AVX2";
+ }
+#endif
+ }
#endif
if (sha1_transform_asm) {
- pr_info("Using %s optimized SHA-1 implementation\n",
- sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
- : "AVX");
+ pr_info("Using %s optimized SHA-1 implementation\n", algo_name);
return crypto_register_shash(&alg);
}
- pr_info("Neither AVX nor SSSE3 is available/usable.\n");
+ pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n");
return -ENODEV;
}
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index e6a92455740e..69f1366f1aa3 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -1,7 +1,7 @@
/*
* This file is part of the Linux kernel.
*
- * Copyright (c) 2011, Intel Corporation
+ * Copyright (c) 2011-2014, Intel Corporation
* Authors: Fenghua Yu <fenghua.yu@intel.com>,
* H. Peter Anvin <hpa@linux.intel.com>
*
@@ -31,10 +31,13 @@
#define RDRAND_RETRY_LOOPS 10
#define RDRAND_INT ".byte 0x0f,0xc7,0xf0"
+#define RDSEED_INT ".byte 0x0f,0xc7,0xf8"
#ifdef CONFIG_X86_64
# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0"
+# define RDSEED_LONG ".byte 0x48,0x0f,0xc7,0xf8"
#else
# define RDRAND_LONG RDRAND_INT
+# define RDSEED_LONG RDSEED_INT
#endif
#ifdef CONFIG_ARCH_RANDOM
@@ -53,6 +56,16 @@ static inline int rdrand_long(unsigned long *v)
return ok;
}
+/* A single attempt at RDSEED */
+static inline bool rdseed_long(unsigned long *v)
+{
+ unsigned char ok;
+ asm volatile(RDSEED_LONG "\n\t"
+ "setc %0"
+ : "=qm" (ok), "=a" (*v));
+ return ok;
+}
+
#define GET_RANDOM(name, type, rdrand, nop) \
static inline int name(type *v) \
{ \
@@ -70,18 +83,40 @@ static inline int name(type *v) \
return ok; \
}
+#define GET_SEED(name, type, rdseed, nop) \
+static inline int name(type *v) \
+{ \
+ unsigned char ok; \
+ alternative_io("movb $0, %0\n\t" \
+ nop, \
+ rdseed "\n\t" \
+ "setc %0", \
+ X86_FEATURE_RDSEED, \
+ ASM_OUTPUT2("=q" (ok), "=a" (*v))); \
+ return ok; \
+}
+
#ifdef CONFIG_X86_64
GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
+GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5);
+GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
+
#else
GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
+GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4);
+GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
+
#endif /* CONFIG_X86_64 */
+#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND)
+#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED)
+
#else
static inline int rdrand_long(unsigned long *v)
@@ -89,6 +124,11 @@ static inline int rdrand_long(unsigned long *v)
return 0;
}
+static inline bool rdseed_long(unsigned long *v)
+{
+ return 0;
+}
+
#endif /* CONFIG_ARCH_RANDOM */
extern void x86_init_rdrand(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fdf83afbb7d9..fcaf9c961265 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -337,6 +337,11 @@ struct kvm_pmu {
u64 reprogram_pmi;
};
+enum {
+ KVM_DEBUGREG_BP_ENABLED = 1,
+ KVM_DEBUGREG_WONT_EXIT = 2,
+};
+
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -444,7 +449,6 @@ struct kvm_vcpu_arch {
} st;
u64 last_guest_tsc;
- u64 last_kernel_ns;
u64 last_host_tsc;
u64 tsc_offset_adjustment;
u64 this_tsc_nsec;
@@ -464,7 +468,7 @@ struct kvm_vcpu_arch {
struct mtrr_state_type mtrr_state;
u32 pat;
- int switch_db_regs;
+ unsigned switch_db_regs;
unsigned long db[KVM_NR_DB_REGS];
unsigned long dr6;
unsigned long dr7;
@@ -599,6 +603,8 @@ struct kvm_arch {
bool use_master_clock;
u64 master_kernel_ns;
cycle_t master_cycle_now;
+ struct delayed_work kvmclock_update_work;
+ struct delayed_work kvmclock_sync_work;
struct kvm_xen_hvm_config xen_hvm_config;
@@ -702,6 +708,7 @@ struct kvm_x86_ops {
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
u64 (*get_dr6)(struct kvm_vcpu *vcpu);
void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
+ void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
@@ -728,8 +735,8 @@ struct kvm_x86_ops {
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
- int (*enable_nmi_window)(struct kvm_vcpu *vcpu);
- int (*enable_irq_window)(struct kvm_vcpu *vcpu);
+ void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+ void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
int (*vm_has_apicv)(struct kvm *kvm);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
@@ -765,6 +772,9 @@ struct kvm_x86_ops {
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
+ bool (*mpx_supported)(void);
+
+ int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2067264fb7f5..7004d21e6219 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -85,6 +85,7 @@
#define VM_EXIT_SAVE_IA32_EFER 0x00100000
#define VM_EXIT_LOAD_IA32_EFER 0x00200000
#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
+#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
@@ -95,6 +96,7 @@
#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
+#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
@@ -174,6 +176,8 @@ enum vmcs_field {
GUEST_PDPTR2_HIGH = 0x0000280f,
GUEST_PDPTR3 = 0x00002810,
GUEST_PDPTR3_HIGH = 0x00002811,
+ GUEST_BNDCFGS = 0x00002812,
+ GUEST_BNDCFGS_HIGH = 0x00002813,
HOST_IA32_PAT = 0x00002c00,
HOST_IA32_PAT_HIGH = 0x00002c01,
HOST_IA32_EFER = 0x00002c02,
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 3e276eb23d1b..c949923a5668 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -49,10 +49,17 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
+extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count);
extern int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op);
+extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count);
extern int m2p_remove_override(struct page *page,
- struct gnttab_map_grant_ref *kmap_op);
+ struct gnttab_map_grant_ref *kmap_op,
+ unsigned long mfn);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
@@ -121,7 +128,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
pfn = m2p_find_override_pfn(mfn, ~0);
}
- /*
+ /*
* pfn is ~0 if there are no entries in the m2p for mfn or if the
* entry doesn't map back to the mfn and m2p_override doesn't have a
* valid entry for it.
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 6c1d7411eb00..d949ef28c48b 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,8 @@
#define XSTATE_Hi16_ZMM 0x80
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
+/* Bit 63 of XCR0 is reserved for future expansion */
+#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
#define FXSAVE_SIZE 512
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 4924f4be2b99..c827ace3121b 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -295,6 +295,7 @@
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
+#define MSR_IA32_BNDCFGS 0x00000d90
#define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0641113e2965..a952e9c85b6f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1225,21 +1225,24 @@ static struct notifier_block cacheinfo_cpu_notifier = {
static int __init cache_sysfs_init(void)
{
- int i;
+ int i, err = 0;
if (num_cache_leaves == 0)
return 0;
+ cpu_notifier_register_begin();
for_each_online_cpu(i) {
- int err;
struct device *dev = get_cpu_device(i);
err = cache_add_dev(dev);
if (err)
- return err;
+ goto out;
}
- register_hotcpu_notifier(&cacheinfo_cpu_notifier);
- return 0;
+ __register_hotcpu_notifier(&cacheinfo_cpu_notifier);
+
+out:
+ cpu_notifier_register_done();
+ return err;
}
device_initcall(cache_sysfs_init);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4d5419b249da..9b7734b1f975 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2434,14 +2434,18 @@ static __init int mcheck_init_device(void)
if (err)
return err;
+ cpu_notifier_register_begin();
for_each_online_cpu(i) {
err = mce_device_create(i);
- if (err)
+ if (err) {
+ cpu_notifier_register_done();
return err;
+ }
}
register_syscore_ops(&mce_syscore_ops);
- register_hotcpu_notifier(&mce_cpu_notifier);
+ __register_hotcpu_notifier(&mce_cpu_notifier);
+ cpu_notifier_register_done();
/* register character device /dev/mcelog */
misc_register(&mce_chrdev_device);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 3eec7de76efb..d921b7ee6595 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -271,9 +271,6 @@ static void thermal_throttle_remove_dev(struct device *dev)
sysfs_remove_group(&dev->kobj, &thermal_attr_group);
}
-/* Mutex protecting device creation against CPU hotplug: */
-static DEFINE_MUTEX(therm_cpu_lock);
-
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
static int
thermal_throttle_cpu_callback(struct notifier_block *nfb,
@@ -289,18 +286,14 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- mutex_lock(&therm_cpu_lock);
err = thermal_throttle_add_dev(dev, cpu);
- mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- mutex_lock(&therm_cpu_lock);
thermal_throttle_remove_dev(dev);
- mutex_unlock(&therm_cpu_lock);
break;
}
return notifier_from_errno(err);
@@ -319,19 +312,16 @@ static __init int thermal_throttle_init_device(void)
if (!atomic_read(&therm_throt_en))
return 0;
- register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
+ cpu_notifier_register_begin();
-#ifdef CONFIG_HOTPLUG_CPU
- mutex_lock(&therm_cpu_lock);
-#endif
/* connect live CPUs to sysfs */
for_each_online_cpu(cpu) {
err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
WARN_ON(err);
}
-#ifdef CONFIG_HOTPLUG_CPU
- mutex_unlock(&therm_cpu_lock);
-#endif
+
+ __register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 4b8e4d3cd6ea..4c36bbe3173a 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -926,13 +926,13 @@ static __init int amd_ibs_init(void)
goto out;
perf_ibs_pm_init();
- get_online_cpus();
+ cpu_notifier_register_begin();
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
smp_mb();
- perf_cpu_notifier(perf_ibs_cpu_notifier);
smp_call_function(setup_APIC_ibs, NULL, 1);
- put_online_cpus();
+ __perf_cpu_notifier(perf_ibs_cpu_notifier);
+ cpu_notifier_register_done();
ret = perf_event_ibs_init();
out:
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index 754291adec33..3bbdf4cd38b9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -531,15 +531,16 @@ static int __init amd_uncore_init(void)
if (ret)
return -ENODEV;
- get_online_cpus();
+ cpu_notifier_register_begin();
+
/* init cpus already online before registering for hotplug notifier */
for_each_online_cpu(cpu) {
amd_uncore_cpu_up_prepare(cpu);
smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
}
- register_cpu_notifier(&amd_uncore_cpu_notifier_block);
- put_online_cpus();
+ __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 5ad35ad94d0f..059218ed5208 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -646,19 +646,20 @@ static int __init rapl_pmu_init(void)
/* unsupported */
return 0;
}
- get_online_cpus();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
rapl_cpu_prepare(cpu);
rapl_cpu_init(cpu);
}
- perf_cpu_notifier(rapl_cpu_notifier);
+ __perf_cpu_notifier(rapl_cpu_notifier);
ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
if (WARN_ON(ret)) {
pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
- put_online_cpus();
+ cpu_notifier_register_done();
return -1;
}
@@ -672,7 +673,7 @@ static int __init rapl_pmu_init(void)
hweight32(rapl_cntr_mask),
ktime_to_ms(pmu->timer_interval));
- put_online_cpus();
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index bd2253d40cff..65bbbea38b9c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -4244,7 +4244,7 @@ static void __init uncore_cpumask_init(void)
if (!cpumask_empty(&uncore_cpu_mask))
return;
- get_online_cpus();
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
int i, phys_id = topology_physical_package_id(cpu);
@@ -4263,9 +4263,9 @@ static void __init uncore_cpumask_init(void)
}
on_each_cpu(uncore_cpu_setup, NULL, 1);
- register_cpu_notifier(&uncore_cpu_nb);
+ __register_cpu_notifier(&uncore_cpu_nb);
- put_online_cpus();
+ cpu_notifier_register_done();
}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 7d9481c743f8..3225ae6c5180 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -198,14 +198,15 @@ static int __init cpuid_init(void)
goto out_chrdev;
}
cpuid_class->devnode = cpuid_devnode;
- get_online_cpus();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(i) {
err = cpuid_device_create(i);
if (err != 0)
goto out_class;
}
- register_hotcpu_notifier(&cpuid_class_cpu_notifier);
- put_online_cpus();
+ __register_hotcpu_notifier(&cpuid_class_cpu_notifier);
+ cpu_notifier_register_done();
err = 0;
goto out;
@@ -215,7 +216,7 @@ out_class:
for_each_online_cpu(i) {
cpuid_device_destroy(i);
}
- put_online_cpus();
+ cpu_notifier_register_done();
class_destroy(cpuid_class);
out_chrdev:
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
@@ -227,13 +228,13 @@ static void __exit cpuid_exit(void)
{
int cpu = 0;
- get_online_cpus();
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu)
cpuid_device_destroy(cpu);
class_destroy(cpuid_class);
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
- unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
- put_online_cpus();
+ __unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
+ cpu_notifier_register_done();
}
module_init(cpuid_init);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index e6253195a301..52819e816f87 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -308,7 +308,10 @@ static int ftrace_write(unsigned long ip, const char *val, int size)
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa_symbol(ip));
- return probe_kernel_write((void *)ip, val, size);
+ if (probe_kernel_write((void *)ip, val, size))
+ return -EPERM;
+
+ return 0;
}
static int add_break(unsigned long ip, const char *old)
@@ -323,10 +326,7 @@ static int add_break(unsigned long ip, const char *old)
if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
return -EINVAL;
- if (ftrace_write(ip, &brk, 1))
- return -EPERM;
-
- return 0;
+ return ftrace_write(ip, &brk, 1);
}
static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -425,7 +425,7 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
/* If this does not have a breakpoint, we are done */
if (ins[0] != brk)
- return -1;
+ return 0;
nop = ftrace_nop_replace();
@@ -455,7 +455,7 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
}
update:
- return probe_kernel_write((void *)ip, &nop[0], 1);
+ return ftrace_write(ip, nop, 1);
}
static int add_update_code(unsigned long ip, unsigned const char *new)
@@ -463,9 +463,7 @@ static int add_update_code(unsigned long ip, unsigned const char *new)
/* skip breakpoint */
ip++;
new++;
- if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
- return -EPERM;
- return 0;
+ return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
}
static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -520,10 +518,7 @@ static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
new = ftrace_call_replace(ip, addr);
- if (ftrace_write(ip, new, 1))
- return -EPERM;
-
- return 0;
+ return ftrace_write(ip, new, 1);
}
static int finish_update_nop(struct dyn_ftrace *rec)
@@ -533,9 +528,7 @@ static int finish_update_nop(struct dyn_ftrace *rec)
new = ftrace_nop_replace();
- if (ftrace_write(ip, new, 1))
- return -EPERM;
- return 0;
+ return ftrace_write(ip, new, 1);
}
static int finish_update(struct dyn_ftrace *rec, int enable)
@@ -632,8 +625,14 @@ void ftrace_replace_code(int enable)
printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
for_ftrace_rec_iter(iter) {
rec = ftrace_rec_iter_record(iter);
- remove_breakpoint(rec);
+ /*
+ * Breakpoints are handled only when this function is in
+ * progress. The system could not work with them.
+ */
+ if (remove_breakpoint(rec))
+ BUG();
}
+ run_sync();
}
static int
@@ -655,16 +654,19 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
run_sync();
ret = ftrace_write(ip, new_code, 1);
- if (ret) {
- ret = -EPERM;
- goto out;
- }
- run_sync();
+ /*
+ * The breakpoint is handled only when this function is in progress.
+ * The system could not work if we could not remove it.
+ */
+ BUG_ON(ret);
out:
+ run_sync();
return ret;
fail_update:
- probe_kernel_write((void *)ip, &old_code[0], 1);
+ /* Also here the system could not work with the breakpoint */
+ if (ftrace_write(ip, old_code, 1))
+ BUG();
goto out;
}
@@ -678,11 +680,8 @@ void arch_ftrace_update_code(int command)
atomic_dec(&modifying_ftrace_code);
}
-int __init ftrace_dyn_arch_init(void *data)
+int __init ftrace_dyn_arch_init(void)
{
- /* The return code is retured via data */
- *(unsigned long *)data = 0;
-
return 0;
}
#endif
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 93eed15a8fd4..8d80ae011603 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -941,12 +941,14 @@ static __init int hpet_late_init(void)
if (boot_cpu_has(X86_FEATURE_ARAT))
return 0;
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
}
/* This notifier should be called after workqueue is ready */
- hotcpu_notifier(hpet_cpuhp_notify, -20);
+ __hotcpu_notifier(hpet_cpuhp_notify, -20);
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 713f1b3bad52..0331cb389d68 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -417,7 +417,6 @@ void kvm_disable_steal_time(void)
#ifdef CONFIG_SMP
static void __init kvm_smp_prepare_boot_cpu(void)
{
- WARN_ON(kvm_register_clock("primary cpu clock"));
kvm_guest_cpu_init();
native_smp_prepare_boot_cpu();
kvm_spinlock_init();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e6041094ff26..d9156ceecdff 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -242,7 +242,7 @@ void __init kvmclock_init(void)
hv_clock = __va(mem);
memset(hv_clock, 0, size);
- if (kvm_register_clock("boot clock")) {
+ if (kvm_register_clock("primary cpu clock")) {
hv_clock = NULL;
memblock_free(mem, size);
return;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 05266b5aae22..c9603ac80de5 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -259,14 +259,15 @@ static int __init msr_init(void)
goto out_chrdev;
}
msr_class->devnode = msr_devnode;
- get_online_cpus();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(i) {
err = msr_device_create(i);
if (err != 0)
goto out_class;
}
- register_hotcpu_notifier(&msr_class_cpu_notifier);
- put_online_cpus();
+ __register_hotcpu_notifier(&msr_class_cpu_notifier);
+ cpu_notifier_register_done();
err = 0;
goto out;
@@ -275,7 +276,7 @@ out_class:
i = 0;
for_each_online_cpu(i)
msr_device_destroy(i);
- put_online_cpus();
+ cpu_notifier_register_done();
class_destroy(msr_class);
out_chrdev:
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
@@ -286,13 +287,14 @@ out:
static void __exit msr_exit(void)
{
int cpu = 0;
- get_online_cpus();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu)
msr_device_destroy(cpu);
class_destroy(msr_class);
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
- unregister_hotcpu_notifier(&msr_class_cpu_notifier);
- put_online_cpus();
+ __unregister_hotcpu_notifier(&msr_class_cpu_notifier);
+ cpu_notifier_register_done();
}
module_init(msr_init);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 9ea287666c65..8b3b3eb3cead 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -348,9 +348,13 @@ static int __init vsyscall_init(void)
{
BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE));
+ cpu_notifier_register_begin();
+
on_each_cpu(cpu_vsyscall_init, NULL, 1);
/* notifier priority > KVM */
- hotcpu_notifier(cpu_vsyscall_notifier, 30);
+ __hotcpu_notifier(cpu_vsyscall_notifier, 30);
+
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e5503d8aec1d..bea60671ef8a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -28,7 +28,7 @@ static u32 xstate_required_size(u64 xstate_bv)
int feature_bit = 0;
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
- xstate_bv &= ~XSTATE_FPSSE;
+ xstate_bv &= XSTATE_EXTEND_MASK;
while (xstate_bv) {
if (xstate_bv & 0x1) {
u32 eax, ebx, ecx, edx;
@@ -43,6 +43,16 @@ static u32 xstate_required_size(u64 xstate_bv)
return ret;
}
+u64 kvm_supported_xcr0(void)
+{
+ u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+
+ if (!kvm_x86_ops->mpx_supported())
+ xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+
+ return xcr0;
+}
+
void kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -73,9 +83,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
} else {
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) &
- host_xcr0 & KVM_SUPPORTED_XCR0;
- vcpu->arch.guest_xstate_size =
- xstate_required_size(vcpu->arch.guest_supported_xcr0);
+ kvm_supported_xcr0();
+ vcpu->arch.guest_xstate_size = best->ebx =
+ xstate_required_size(vcpu->arch.xcr0);
}
kvm_pmu_cpuid_update(vcpu);
@@ -210,13 +220,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->flags = 0;
}
-static bool supported_xcr0_bit(unsigned bit)
-{
- u64 mask = ((u64)1 << bit);
-
- return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
-}
-
#define F(x) bit(X86_FEATURE_##x)
static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@ -256,6 +259,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
+ unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
/* cpuid 1.edx */
const u32 kvm_supported_word0_x86_features =
@@ -303,7 +307,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
+ F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
+ F(ADX);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -436,16 +441,18 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
case 0xd: {
int idx, i;
+ u64 supported = kvm_supported_xcr0();
- entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
- entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
+ entry->eax &= supported;
+ entry->edx &= supported >> 32;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (idx = 1, i = 1; idx < 64; ++idx) {
+ u64 mask = ((u64)1 << idx);
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(&entry[i], function, idx);
- if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+ if (entry[i].eax == 0 || !(supported & mask))
continue;
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 07ffca0a89e9..205b17eed93c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3668,6 +3668,10 @@ static const struct gprefix pfx_vmovntpx = {
I(0, em_mov), N, N, N,
};
+static const struct gprefix pfx_0f_28_0f_29 = {
+ I(Aligned, em_mov), I(Aligned, em_mov), N, N,
+};
+
static const struct escape escape_d9 = { {
N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
}, {
@@ -3870,7 +3874,9 @@ static const struct opcode twobyte_table[256] = {
IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
N, N, N, N,
- N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
+ GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
+ GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
+ N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
N, N, N, N,
/* 0x30 - 0x3F */
II(ImplicitOps | Priv, em_wrmsr, wrmsr),
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b531351a587..f5704d9e5ddc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3329,7 +3329,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu.direct_map;
arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
- return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
+ return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
}
static bool can_do_async_pf(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index cba218a2f08d..b1e6c1bf68d3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -913,7 +913,8 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
* and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
* used by guest then tlbs are not flushed, so guest is allowed to access the
* freed pages.
- * And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
+ * We set tlbs_dirty to let the notifier know this change and delay the flush
+ * until such a case actually happens.
*/
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
@@ -942,7 +943,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return -EINVAL;
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
- vcpu->kvm->tlbs_dirty++;
+ vcpu->kvm->tlbs_dirty = true;
continue;
}
@@ -957,7 +958,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
if (gfn != sp->gfns[i]) {
drop_spte(vcpu->kvm, &sp->spt[i]);
- vcpu->kvm->tlbs_dirty++;
+ vcpu->kvm->tlbs_dirty = true;
continue;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2de1bc09a8d4..7f4f9c2badae 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -34,6 +34,7 @@
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
+#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/virtext.h>
@@ -303,20 +304,35 @@ static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
return vmcb->control.intercept_cr & (1U << bit);
}
-static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
+static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_dr |= (1U << bit);
+ vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
+ | (1 << INTERCEPT_DR1_READ)
+ | (1 << INTERCEPT_DR2_READ)
+ | (1 << INTERCEPT_DR3_READ)
+ | (1 << INTERCEPT_DR4_READ)
+ | (1 << INTERCEPT_DR5_READ)
+ | (1 << INTERCEPT_DR6_READ)
+ | (1 << INTERCEPT_DR7_READ)
+ | (1 << INTERCEPT_DR0_WRITE)
+ | (1 << INTERCEPT_DR1_WRITE)
+ | (1 << INTERCEPT_DR2_WRITE)
+ | (1 << INTERCEPT_DR3_WRITE)
+ | (1 << INTERCEPT_DR4_WRITE)
+ | (1 << INTERCEPT_DR5_WRITE)
+ | (1 << INTERCEPT_DR6_WRITE)
+ | (1 << INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
}
-static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
+static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_dr &= ~(1U << bit);
+ vmcb->control.intercept_dr = 0;
recalc_intercepts(svm);
}
@@ -1080,23 +1096,7 @@ static void init_vmcb(struct vcpu_svm *svm)
set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR0_READ);
- set_dr_intercept(svm, INTERCEPT_DR1_READ);
- set_dr_intercept(svm, INTERCEPT_DR2_READ);
- set_dr_intercept(svm, INTERCEPT_DR3_READ);
- set_dr_intercept(svm, INTERCEPT_DR4_READ);
- set_dr_intercept(svm, INTERCEPT_DR5_READ);
- set_dr_intercept(svm, INTERCEPT_DR6_READ);
- set_dr_intercept(svm, INTERCEPT_DR7_READ);
-
- set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
+ set_dr_intercepts(svm);
set_exception_intercept(svm, PF_VECTOR);
set_exception_intercept(svm, UD_VECTOR);
@@ -1684,6 +1684,21 @@ static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
mark_dirty(svm->vmcb, VMCB_DR);
}
+static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ get_debugreg(vcpu->arch.db[0], 0);
+ get_debugreg(vcpu->arch.db[1], 1);
+ get_debugreg(vcpu->arch.db[2], 2);
+ get_debugreg(vcpu->arch.db[3], 3);
+ vcpu->arch.dr6 = svm_get_dr6(vcpu);
+ vcpu->arch.dr7 = svm->vmcb->save.dr7;
+
+ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
+ set_dr_intercepts(svm);
+}
+
static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2842,6 +2857,7 @@ static int iret_interception(struct vcpu_svm *svm)
clr_intercept(svm, INTERCEPT_IRET);
svm->vcpu.arch.hflags |= HF_IRET_MASK;
svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
return 1;
}
@@ -2974,6 +2990,17 @@ static int dr_interception(struct vcpu_svm *svm)
unsigned long val;
int err;
+ if (svm->vcpu.guest_debug == 0) {
+ /*
+ * No more DR vmexits; force a reload of the debug registers
+ * and reenter on this instruction. The next vmexit will
+ * retrieve the full state of the debug registers.
+ */
+ clr_dr_intercepts(svm);
+ svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+ return 1;
+ }
+
if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
return emulate_on_interception(svm);
@@ -3649,7 +3676,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
return ret;
}
-static int enable_irq_window(struct kvm_vcpu *vcpu)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3663,16 +3690,15 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
}
- return 0;
}
-static int enable_nmi_window(struct kvm_vcpu *vcpu)
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
== HF_NMI_MASK)
- return 0; /* IRET will cause a vm exit */
+ return; /* IRET will cause a vm exit */
/*
* Something prevents NMI from been injected. Single step over possible
@@ -3681,7 +3707,6 @@ static int enable_nmi_window(struct kvm_vcpu *vcpu)
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
update_db_bp_intercept(vcpu);
- return 0;
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -4064,6 +4089,11 @@ static bool svm_invpcid_supported(void)
return false;
}
+static bool svm_mpx_supported(void)
+{
+ return false;
+}
+
static bool svm_has_wbinvd_exit(void)
{
return true;
@@ -4302,6 +4332,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.get_dr6 = svm_get_dr6,
.set_dr6 = svm_set_dr6,
.set_dr7 = svm_set_dr7,
+ .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
@@ -4345,6 +4376,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.rdtscp_supported = svm_rdtscp_supported,
.invpcid_supported = svm_invpcid_supported,
+ .mpx_supported = svm_mpx_supported,
.set_supported_cpuid = svm_set_supported_cpuid,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 392752834751..1320e0f8e611 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
#include <linux/ftrace_event.h>
#include <linux/slab.h>
#include <linux/tboot.h>
+#include <linux/hrtimer.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -42,6 +43,7 @@
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/perf_event.h>
+#include <asm/debugreg.h>
#include <asm/kexec.h>
#include "trace.h"
@@ -110,6 +112,8 @@ module_param(nested, bool, S_IRUGO);
#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
+#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@@ -202,6 +206,7 @@ struct __packed vmcs12 {
u64 guest_pdptr1;
u64 guest_pdptr2;
u64 guest_pdptr3;
+ u64 guest_bndcfgs;
u64 host_ia32_pat;
u64 host_ia32_efer;
u64 host_ia32_perf_global_ctrl;
@@ -374,6 +379,9 @@ struct nested_vmx {
*/
struct page *apic_access_page;
u64 msr_ia32_feature_control;
+
+ struct hrtimer preemption_timer;
+ bool preemption_timer_expired;
};
#define POSTED_INTR_ON 0
@@ -441,6 +449,7 @@ struct vcpu_vmx {
#endif
int gs_ldt_reload_needed;
int fs_reload_needed;
+ u64 msr_host_bndcfgs;
} host_state;
struct {
int vm86_active;
@@ -533,6 +542,7 @@ static const unsigned long shadow_read_write_fields[] = {
GUEST_CS_LIMIT,
GUEST_CS_BASE,
GUEST_ES_BASE,
+ GUEST_BNDCFGS,
CR0_GUEST_HOST_MASK,
CR0_READ_SHADOW,
CR4_READ_SHADOW,
@@ -588,6 +598,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(GUEST_PDPTR1, guest_pdptr1),
FIELD64(GUEST_PDPTR2, guest_pdptr2),
FIELD64(GUEST_PDPTR3, guest_pdptr3),
+ FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
FIELD64(HOST_IA32_PAT, host_ia32_pat),
FIELD64(HOST_IA32_EFER, host_ia32_efer),
FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
@@ -718,6 +729,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
+static bool vmx_mpx_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
@@ -728,6 +740,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+static bool vmx_mpx_supported(void);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1047,6 +1060,12 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
+static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
+{
+ return vmcs12->pin_based_vm_exec_control &
+ PIN_BASED_VMX_PREEMPTION_TIMER;
+}
+
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -1710,6 +1729,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#endif
+ if (boot_cpu_has(X86_FEATURE_MPX))
+ rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
vmx->guest_msrs[i].data,
@@ -1747,6 +1768,8 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
+ if (vmx->host_state.msr_host_bndcfgs)
+ wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
/*
* If the FPU is not active (through the host task or
* the guest vcpu), then restore the cr0.TS bit.
@@ -2248,9 +2271,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
*/
nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
- PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
+ PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
+ nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
- nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
/*
* Exit controls
@@ -2265,15 +2288,12 @@ static __init void nested_vmx_setup_ctls_msrs(void)
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
- VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+ nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
+ VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
- !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
- nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
- }
- nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
- VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
+ if (vmx_mpx_supported())
+ nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2287,6 +2307,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
VM_ENTRY_LOAD_IA32_PAT;
nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
VM_ENTRY_LOAD_IA32_EFER);
+ if (vmx_mpx_supported())
+ nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2342,9 +2364,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
- nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
- VMX_MISC_SAVE_EFER_LMA;
- nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
+ nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
+ nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
+ VMX_MISC_ACTIVITY_HLT;
nested_vmx_misc_high = 0;
}
@@ -2479,6 +2501,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
case MSR_IA32_SYSENTER_ESP:
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
+ case MSR_IA32_BNDCFGS:
+ if (!vmx_mpx_supported())
+ return 1;
+ data = vmcs_read64(GUEST_BNDCFGS);
+ break;
case MSR_IA32_FEATURE_CONTROL:
if (!nested_vmx_allowed(vcpu))
return 1;
@@ -2547,6 +2574,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
+ case MSR_IA32_BNDCFGS:
+ if (!vmx_mpx_supported())
+ return 1;
+ vmcs_write64(GUEST_BNDCFGS, data);
+ break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
@@ -2832,12 +2864,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmx_capability.ept, vmx_capability.vpid);
}
- min = 0;
+ min = VM_EXIT_SAVE_DEBUG_CONTROLS;
#ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
- VM_EXIT_ACK_INTR_ON_EXIT;
+ VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
@@ -2853,8 +2885,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
!(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
- min = 0;
- opt = VM_ENTRY_LOAD_IA32_PAT;
+ min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
+ opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0)
return -EIO;
@@ -4223,6 +4255,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
{
u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
+
+ if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
+ exec_control &= ~CPU_BASED_MOV_DR_EXITING;
+
if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) {
exec_control &= ~CPU_BASED_TPR_SHADOW;
#ifdef CONFIG_X86_64
@@ -4496,39 +4532,28 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
PIN_BASED_NMI_EXITING;
}
-static int enable_irq_window(struct kvm_vcpu *vcpu)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
- if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
- /*
- * We get here if vmx_interrupt_allowed() said we can't
- * inject to L1 now because L2 must run. The caller will have
- * to make L2 exit right after entry, so we can inject to L1
- * more promptly.
- */
- return -EBUSY;
-
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
- return 0;
}
-static int enable_nmi_window(struct kvm_vcpu *vcpu)
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
- if (!cpu_has_virtual_nmis())
- return enable_irq_window(vcpu);
-
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI)
- return enable_irq_window(vcpu);
+ if (!cpu_has_virtual_nmis() ||
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ enable_irq_window(vcpu);
+ return;
+ }
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
- return 0;
}
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -4620,22 +4645,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu)) {
- if (to_vmx(vcpu)->nested.nested_run_pending)
- return 0;
- if (nested_exit_on_nmi(vcpu)) {
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
- NMI_VECTOR | INTR_TYPE_NMI_INTR |
- INTR_INFO_VALID_MASK, 0);
- /*
- * The NMI-triggered VM exit counts as injection:
- * clear this one and block further NMIs.
- */
- vcpu->arch.nmi_pending = 0;
- vmx_set_nmi_mask(vcpu, true);
- return 0;
- }
- }
+ if (to_vmx(vcpu)->nested.nested_run_pending)
+ return 0;
if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
return 0;
@@ -4647,19 +4658,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu)) {
- if (to_vmx(vcpu)->nested.nested_run_pending)
- return 0;
- if (nested_exit_on_intr(vcpu)) {
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
- 0, 0);
- /*
- * fall through to normal code, but now in L1, not L2
- */
- }
- }
-
- return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ return (!to_vmx(vcpu)->nested.nested_run_pending &&
+ vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
}
@@ -5102,6 +5102,22 @@ static int handle_dr(struct kvm_vcpu *vcpu)
}
}
+ if (vcpu->guest_debug == 0) {
+ u32 cpu_based_vm_exec_control;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+
+ /*
+ * No more DR vmexits; force a reload of the debug registers
+ * and reenter on this instruction. The next vmexit will
+ * retrieve the full state of the debug registers.
+ */
+ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+ return 1;
+ }
+
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
reg = DEBUG_REG_ACCESS_REG(exit_qualification);
@@ -5128,6 +5144,24 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
{
}
+static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
+{
+ u32 cpu_based_vm_exec_control;
+
+ get_debugreg(vcpu->arch.db[0], 0);
+ get_debugreg(vcpu->arch.db[1], 1);
+ get_debugreg(vcpu->arch.db[2], 2);
+ get_debugreg(vcpu->arch.db[3], 3);
+ get_debugreg(vcpu->arch.dr6, 6);
+ vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
+
+ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
{
vmcs_writel(GUEST_DR7, val);
@@ -5727,6 +5761,18 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
*/
}
+static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
+{
+ struct vcpu_vmx *vmx =
+ container_of(timer, struct vcpu_vmx, nested.preemption_timer);
+
+ vmx->nested.preemption_timer_expired = true;
+ kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+ kvm_vcpu_kick(&vmx->vcpu);
+
+ return HRTIMER_NORESTART;
+}
+
/*
* Emulate the VMXON instruction.
* Currently, we just remember that VMX is active, and do not save or even
@@ -5791,6 +5837,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
vmx->nested.vmcs02_num = 0;
+ hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+
vmx->nested.vmxon = true;
skip_emulated_instruction(vcpu);
@@ -6767,9 +6817,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* table is L0's fault.
*/
return 0;
- case EXIT_REASON_PREEMPTION_TIMER:
- return vmcs12->pin_based_vm_exec_control &
- PIN_BASED_VMX_PREEMPTION_TIMER;
case EXIT_REASON_WBINVD:
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
case EXIT_REASON_XSETBV:
@@ -6785,27 +6832,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
-static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
-{
- u64 delta_tsc_l1;
- u32 preempt_val_l1, preempt_val_l2, preempt_scale;
-
- if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
- PIN_BASED_VMX_PREEMPTION_TIMER))
- return;
- preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
- MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
- preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
- delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
- - vcpu->arch.last_guest_tsc;
- preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
- if (preempt_val_l2 <= preempt_val_l1)
- preempt_val_l2 = 0;
- else
- preempt_val_l2 -= preempt_val_l1;
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
-}
-
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
@@ -7052,6 +7078,12 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
local_irq_enable();
}
+static bool vmx_mpx_supported(void)
+{
+ return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
+ (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
+}
+
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -7218,8 +7250,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
- if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
- nested_adjust_preemption_timer(vcpu);
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -7616,6 +7646,28 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
kvm_inject_page_fault(vcpu, fault);
}
+static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
+{
+ u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vcpu->arch.virtual_tsc_khz == 0)
+ return;
+
+ /* Make sure short timeouts reliably trigger an immediate vmexit.
+ * hrtimer_start does not guarantee this. */
+ if (preemption_timeout <= 1) {
+ vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
+ return;
+ }
+
+ preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+ preemption_timeout *= 1000000;
+ do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
+ hrtimer_start(&vmx->nested.preemption_timer,
+ ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
+}
+
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7629,7 +7681,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
- u32 exit_control;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7687,13 +7738,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs_write64(VMCS_LINK_POINTER, -1ull);
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
- (vmcs_config.pin_based_exec_ctrl |
- vmcs12->pin_based_vm_exec_control));
+ exec_control = vmcs12->pin_based_vm_exec_control;
+ exec_control |= vmcs_config.pin_based_exec_ctrl;
+ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
- if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
- vmcs12->vmx_preemption_timer_value);
+ vmx->nested.preemption_timer_expired = false;
+ if (nested_cpu_has_preemption_timer(vmcs12))
+ vmx_start_preemption_timer(vcpu);
/*
* Whether page-faults are trapped is determined by a combination of
@@ -7721,7 +7773,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
enable_ept ? vmcs12->page_fault_error_code_match : 0);
if (cpu_has_secondary_exec_ctrls()) {
- u32 exec_control = vmx_secondary_exec_control(vmx);
+ exec_control = vmx_secondary_exec_control(vmx);
if (!vmx->rdtscp_enabled)
exec_control &= ~SECONDARY_EXEC_RDTSCP;
/* Take the following fields only from vmcs12 */
@@ -7808,10 +7860,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
* bits are further modified by vmx_set_efer() below.
*/
- exit_control = vmcs_config.vmexit_ctrl;
- if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
- exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- vm_exit_controls_init(vmx, exit_control);
+ vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
@@ -7830,6 +7879,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
set_cr4_guest_host_mask(vmx);
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vmcs_write64(TSC_OFFSET,
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
@@ -8155,6 +8207,58 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
}
}
+static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
+ vmx->nested.preemption_timer_expired) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
+ return 0;
+ }
+
+ if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
+ if (vmx->nested.nested_run_pending ||
+ vcpu->arch.interrupt.pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+ NMI_VECTOR | INTR_TYPE_NMI_INTR |
+ INTR_INFO_VALID_MASK, 0);
+ /*
+ * The NMI-triggered VM exit counts as injection:
+ * clear this one and block further NMIs.
+ */
+ vcpu->arch.nmi_pending = 0;
+ vmx_set_nmi_mask(vcpu, true);
+ return 0;
+ }
+
+ if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
+ nested_exit_on_intr(vcpu)) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+ }
+
+ return 0;
+}
+
+static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
+{
+ ktime_t remaining =
+ hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
+ u64 value;
+
+ if (ktime_to_ns(remaining) <= 0)
+ return 0;
+
+ value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
+ do_div(value, 1000000);
+ return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+}
+
/*
* prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
* and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8225,10 +8329,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
else
vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
- if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
- (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
- vmcs12->vmx_preemption_timer_value =
- vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+ if (nested_cpu_has_preemption_timer(vmcs12)) {
+ if (vmcs12->vm_exit_controls &
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+ vmcs12->vmx_preemption_timer_value =
+ vmx_get_preemption_timer_value(vcpu);
+ hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
+ }
/*
* In some cases (usually, nested EPT), L2 is allowed to change its
@@ -8260,6 +8367,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+ if (vmx_mpx_supported())
+ vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
/* update exit information fields: */
@@ -8369,6 +8478,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
+ /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
+ if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
+ vmcs_write64(GUEST_BNDCFGS, 0);
+
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
@@ -8495,6 +8608,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
vmx->nested.sync_shadow_vmcs = true;
+
+ /* in case we halted in L2 */
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
/*
@@ -8573,6 +8689,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.get_dr6 = vmx_get_dr6,
.set_dr6 = vmx_set_dr6,
.set_dr7 = vmx_set_dr7,
+ .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
@@ -8634,6 +8751,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.check_intercept = vmx_check_intercept,
.handle_external_intr = vmx_handle_external_intr,
+ .mpx_supported = vmx_mpx_supported,
+
+ .check_nested_events = vmx_check_nested_events,
};
static int __init vmx_init(void)
@@ -8721,6 +8841,8 @@ static int __init vmx_init(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+ vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
memcpy(vmx_msr_bitmap_longmode_x2apic,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b8578432d5b..9d1b5cd4d34c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -595,13 +595,13 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
- u64 xcr0;
+ u64 xcr0 = xcr;
+ u64 old_xcr0 = vcpu->arch.xcr0;
u64 valid_bits;
/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
if (index != XCR_XFEATURE_ENABLED_MASK)
return 1;
- xcr0 = xcr;
if (!(xcr0 & XSTATE_FP))
return 1;
if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
@@ -616,8 +616,14 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
if (xcr0 & ~valid_bits)
return 1;
+ if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
+ return 1;
+
kvm_put_guest_xcr0(vcpu);
vcpu->arch.xcr0 = xcr0;
+
+ if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK)
+ kvm_update_cpuid(vcpu);
return 0;
}
@@ -753,7 +759,9 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu)
else
dr7 = vcpu->arch.dr7;
kvm_x86_ops->set_dr7(vcpu, dr7);
- vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
+ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
+ if (dr7 & DR7_BP_EN_MASK)
+ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
}
static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
@@ -879,7 +887,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
- MSR_IA32_FEATURE_CONTROL
+ MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
};
static unsigned num_msrs_to_save;
@@ -1581,7 +1589,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
/* With all the info we got, fill in the values */
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
- vcpu->last_kernel_ns = kernel_ns;
vcpu->last_guest_tsc = tsc_timestamp;
/*
@@ -1623,14 +1630,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
* the others.
*
* So in those cases, request a kvmclock update for all vcpus.
- * The worst case for a remote vcpu to update its kvmclock
- * is then bounded by maximum nohz sleep latency.
+ * We need to rate-limit these requests though, as they can
+ * considerably slow guests that have a large number of vcpus.
+ * The time for a remote vcpu to update its kvmclock is bound
+ * by the delay we use to rate-limit the updates.
*/
-static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
+#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
+
+static void kvmclock_update_fn(struct work_struct *work)
{
int i;
- struct kvm *kvm = v->kvm;
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
+ kvmclock_update_work);
+ struct kvm *kvm = container_of(ka, struct kvm, arch);
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -1639,6 +1653,29 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
}
}
+static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
+{
+ struct kvm *kvm = v->kvm;
+
+ set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
+ schedule_delayed_work(&kvm->arch.kvmclock_update_work,
+ KVMCLOCK_UPDATE_DELAY);
+}
+
+#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
+
+static void kvmclock_sync_fn(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
+ kvmclock_sync_work);
+ struct kvm *kvm = container_of(ka, struct kvm, arch);
+
+ schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
+ schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
+ KVMCLOCK_SYNC_PERIOD);
+}
+
static bool msr_mtrr_valid(unsigned msr)
{
switch (msr) {
@@ -2323,9 +2360,12 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case HV_X64_MSR_VP_INDEX: {
int r;
struct kvm_vcpu *v;
- kvm_for_each_vcpu(r, v, vcpu->kvm)
- if (v == vcpu)
+ kvm_for_each_vcpu(r, v, vcpu->kvm) {
+ if (v == vcpu) {
data = r;
+ break;
+ }
+ }
break;
}
case HV_X64_MSR_EOI:
@@ -2617,6 +2657,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM:
case KVM_CAP_HYPERV_TIME:
+ case KVM_CAP_IOAPIC_POLARITY_IGNORED:
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
@@ -3043,9 +3084,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
* CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
* with old userspace.
*/
- if (xstate_bv & ~KVM_SUPPORTED_XCR0)
- return -EINVAL;
- if (xstate_bv & ~host_xcr0)
+ if (xstate_bv & ~kvm_supported_xcr0())
return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state->xsave,
guest_xsave->region, vcpu->arch.guest_xstate_size);
@@ -3898,6 +3937,23 @@ static void kvm_init_msr_list(void)
for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
continue;
+
+ /*
+ * Even MSRs that are valid in the host may not be exposed
+ * to the guests in some cases. We could work around this
+ * in VMX with the generic MSR save/load machinery, but it
+ * is not really worthwhile since it will really only
+ * happen with nested virtualization.
+ */
+ switch (msrs_to_save[i]) {
+ case MSR_IA32_BNDCFGS:
+ if (!kvm_x86_ops->mpx_supported())
+ continue;
+ break;
+ default:
+ break;
+ }
+
if (j < i)
msrs_to_save[j] = msrs_to_save[i];
j++;
@@ -4394,6 +4450,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (!exchanged)
return X86EMUL_CMPXCHG_FAILED;
+ mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
kvm_mmu_pte_write(vcpu, gpa, new, bytes);
return X86EMUL_CONTINUE;
@@ -5365,7 +5422,8 @@ static void kvm_timer_init(void)
int cpu;
max_tsc_khz = tsc_khz;
- register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+
+ cpu_notifier_register_begin();
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
#ifdef CONFIG_CPU_FREQ
struct cpufreq_policy policy;
@@ -5382,6 +5440,10 @@ static void kvm_timer_init(void)
pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
for_each_online_cpu(cpu)
smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
+
+ __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+ cpu_notifier_register_done();
+
}
static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@ -5537,9 +5599,10 @@ int kvm_arch_init(void *opaque)
goto out_free_percpu;
kvm_set_mmio_spte_mask();
- kvm_init_msr_list();
kvm_x86_ops = ops;
+ kvm_init_msr_list();
+
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -5782,8 +5845,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
}
-static void inject_pending_event(struct kvm_vcpu *vcpu)
+static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
{
+ int r;
+
/* try to reinject previous events if any */
if (vcpu->arch.exception.pending) {
trace_kvm_inj_exception(vcpu->arch.exception.nr,
@@ -5793,17 +5858,23 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
vcpu->arch.exception.reinject);
- return;
+ return 0;
}
if (vcpu->arch.nmi_injected) {
kvm_x86_ops->set_nmi(vcpu);
- return;
+ return 0;
}
if (vcpu->arch.interrupt.pending) {
kvm_x86_ops->set_irq(vcpu);
- return;
+ return 0;
+ }
+
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+ r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+ if (r != 0)
+ return r;
}
/* try to inject new event if pending */
@@ -5820,6 +5891,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
kvm_x86_ops->set_irq(vcpu);
}
}
+ return 0;
}
static void process_nmi(struct kvm_vcpu *vcpu)
@@ -5924,15 +5996,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
- inject_pending_event(vcpu);
-
+ if (inject_pending_event(vcpu, req_int_win) != 0)
+ req_immediate_exit = true;
/* enable NMI/IRQ window open exits if needed */
- if (vcpu->arch.nmi_pending)
- req_immediate_exit =
- kvm_x86_ops->enable_nmi_window(vcpu) != 0;
+ else if (vcpu->arch.nmi_pending)
+ kvm_x86_ops->enable_nmi_window(vcpu);
else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
- req_immediate_exit =
- kvm_x86_ops->enable_irq_window(vcpu) != 0;
+ kvm_x86_ops->enable_irq_window(vcpu);
if (kvm_lapic_enabled(vcpu)) {
/*
@@ -5992,12 +6062,28 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
+ set_debugreg(vcpu->arch.dr6, 6);
}
trace_kvm_entry(vcpu->vcpu_id);
kvm_x86_ops->run(vcpu);
/*
+ * Do this here before restoring debug registers on the host. And
+ * since we do this before handling the vmexit, a DR access vmexit
+ * can (a) read the correct value of the debug registers, (b) set
+ * KVM_DEBUGREG_WONT_EXIT again.
+ */
+ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
+ int i;
+
+ WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
+ kvm_x86_ops->sync_dirty_debug_regs(vcpu);
+ for (i = 0; i < KVM_NR_DB_REGS; i++)
+ vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+ }
+
+ /*
* If the guest has used debug registers, at least dr7
* will be disabled while returning to the host.
* If we don't have active breakpoints in the host, we don't
@@ -6711,6 +6797,7 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
int r;
struct msr_data msr;
+ struct kvm *kvm = vcpu->kvm;
r = vcpu_load(vcpu);
if (r)
@@ -6721,6 +6808,9 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu);
+ schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
+ KVMCLOCK_SYNC_PERIOD);
+
return r;
}
@@ -7013,6 +7103,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
pvclock_update_vm_gtod_copy(kvm);
+ INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
+ INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
+
return 0;
}
@@ -7050,6 +7143,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_sync_events(struct kvm *kvm)
{
+ cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
+ cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
kvm_free_all_assigned_devices(kvm);
kvm_free_pit(kvm);
}
@@ -7248,6 +7343,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+ kvm_x86_ops->check_nested_events(vcpu, false);
+
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
|| !list_empty_careful(&vcpu->async_pf.done)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8da5823bcde6..8c97bac9a895 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -122,9 +122,12 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception);
-#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
+ | XSTATE_BNDREGS | XSTATE_BNDCSR)
extern u64 host_xcr0;
+extern u64 kvm_supported_xcr0(void);
+
extern unsigned int min_timer_period_us;
extern struct static_key kvm_no_apic_vcpu;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4ed75dd81d05..dc017735bb91 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -553,13 +553,13 @@ void bpf_jit_compile(struct sk_filter *fp)
}
break;
case BPF_S_ANC_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
- if (is_imm8(offsetof(struct sk_buff, rxhash))) {
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ if (is_imm8(offsetof(struct sk_buff, hash))) {
/* mov off8(%rdi),%eax */
- EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash));
+ EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash));
} else {
EMIT2(0x8b, 0x87);
- EMIT(offsetof(struct sk_buff, rxhash), 4);
+ EMIT(offsetof(struct sk_buff, hash), 4);
}
break;
case BPF_S_ANC_QUEUE:
@@ -772,6 +772,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
bpf_flush_icache(header, image + proglen);
set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *)image;
+ fp->jited = 1;
}
out:
kfree(addrs);
@@ -791,7 +792,7 @@ static void bpf_jit_free_deferred(struct work_struct *work)
void bpf_jit_free(struct sk_filter *fp)
{
- if (fp->bpf_func != sk_run_filter) {
+ if (fp->jited) {
INIT_WORK(&fp->work, bpf_jit_free_deferred);
schedule_work(&fp->work);
} else {
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 6890d8498e0b..379e8bd0deea 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -494,14 +494,19 @@ static int nmi_setup(void)
if (err)
goto fail;
+ cpu_notifier_register_begin();
+
+ /* Use get/put_online_cpus() to protect 'nmi_enabled' */
get_online_cpus();
- register_cpu_notifier(&oprofile_cpu_nb);
nmi_enabled = 1;
/* make nmi_enabled visible to the nmi handler: */
smp_mb();
on_each_cpu(nmi_cpu_setup, NULL, 1);
+ __register_cpu_notifier(&oprofile_cpu_nb);
put_online_cpus();
+ cpu_notifier_register_done();
+
return 0;
fail:
free_msrs();
@@ -512,12 +517,18 @@ static void nmi_shutdown(void)
{
struct op_msrs *msrs;
+ cpu_notifier_register_begin();
+
+ /* Use get/put_online_cpus() to protect 'nmi_enabled' & 'ctr_running' */
get_online_cpus();
- unregister_cpu_notifier(&oprofile_cpu_nb);
on_each_cpu(nmi_cpu_shutdown, NULL, 1);
nmi_enabled = 0;
ctr_running = 0;
+ __unregister_cpu_notifier(&oprofile_cpu_nb);
put_online_cpus();
+
+ cpu_notifier_register_done();
+
/* make variables visible to the nmi handler: */
smp_mb();
unregister_nmi_handler(NMI_LOCAL, "oprofile");
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index a313a7fb6b86..e88f4c53d7f6 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -370,10 +370,13 @@ static int __init pci_io_ecs_init(void)
if (early_pci_allowed())
pci_enable_pci_io_ecs();
- register_cpu_notifier(&amd_cpu_notifier);
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu)
amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
(void *)(long)cpu);
+ __register_cpu_notifier(&amd_cpu_notifier);
+ cpu_notifier_register_done();
+
pci_probe |= PCI_HAS_IO_ECS;
return 0;
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 103e702ec5a7..905956f16465 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -178,6 +178,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
i = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i],
+ (type == PCI_CAP_ID_MSI) ? nvec : 1,
(type == PCI_CAP_ID_MSIX) ?
"pcifront-msi-x" :
"pcifront-msi",
@@ -245,6 +246,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
"xen: msi already bound to pirq=%d\n", pirq);
}
irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
+ (type == PCI_CAP_ID_MSI) ? nvec : 1,
(type == PCI_CAP_ID_MSIX) ?
"msi-x" : "msi",
DOMID_SELF);
@@ -269,9 +271,6 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
int ret = 0;
struct msi_desc *msidesc;
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
-
list_for_each_entry(msidesc, &dev->msi_list, list) {
struct physdev_map_pirq map_irq;
domid_t domid;
@@ -291,7 +290,10 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
(pci_domain_nr(dev->bus) << 16);
map_irq.devfn = dev->devfn;
- if (type == PCI_CAP_ID_MSIX) {
+ if (type == PCI_CAP_ID_MSI && nvec > 1) {
+ map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI;
+ map_irq.entry_nr = nvec;
+ } else if (type == PCI_CAP_ID_MSIX) {
int pos;
u32 table_offset, bir;
@@ -308,6 +310,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (pci_seg_supported)
ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
&map_irq);
+ if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) {
+ /*
+ * If MAP_PIRQ_TYPE_MULTI_MSI is not available
+ * there's nothing else we can do in this case.
+ * Just set ret > 0 so driver can retry with
+ * single MSI.
+ */
+ ret = 1;
+ goto out;
+ }
if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
map_irq.type = MAP_PIRQ_TYPE_MSI;
map_irq.index = -1;
@@ -324,11 +336,10 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
goto out;
}
- ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
- map_irq.pirq,
- (type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi",
- domid);
+ ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq,
+ (type == PCI_CAP_ID_MSI) ? nvec : 1,
+ (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi",
+ domid);
if (ret < 0)
goto out;
}
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a12bddc7ccea..04376ac3d9ef 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -322,6 +322,7 @@
313 common finit_module sys_finit_module
314 common sched_setattr sys_sched_setattr
315 common sched_getattr sys_sched_getattr
+316 common renameat2 sys_renameat2
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 9c50cc2e403b..e88fda867a33 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -19,11 +19,6 @@ config XEN_DOM0
depends on XEN && PCI_XEN && SWIOTLB_XEN
depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
-# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST
-# name in tools.
-config XEN_PRIVILEGED_GUEST
- def_bool XEN_DOM0
-
config XEN_PVHVM
def_bool y
depends on XEN && PCI && X86_LOCAL_APIC
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 696c694986d0..85e5d78c9874 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -881,6 +881,65 @@ static unsigned long mfn_hash(unsigned long mfn)
return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
}
+int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count)
+{
+ int i, ret = 0;
+ bool lazy = false;
+ pte_t *pte;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ if (kmap_ops &&
+ !in_interrupt() &&
+ paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+ arch_enter_lazy_mmu_mode();
+ lazy = true;
+ }
+
+ for (i = 0; i < count; i++) {
+ unsigned long mfn, pfn;
+
+ /* Do not add to override if the map failed. */
+ if (map_ops[i].status)
+ continue;
+
+ if (map_ops[i].flags & GNTMAP_contains_pte) {
+ pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
+ (map_ops[i].host_addr & ~PAGE_MASK));
+ mfn = pte_mfn(*pte);
+ } else {
+ mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
+ }
+ pfn = page_to_pfn(pages[i]);
+
+ WARN_ON(PagePrivate(pages[i]));
+ SetPagePrivate(pages[i]);
+ set_page_private(pages[i], mfn);
+ pages[i]->index = pfn_to_mfn(pfn);
+
+ if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (kmap_ops) {
+ ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ if (lazy)
+ arch_leave_lazy_mmu_mode();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
+
/* Add an MFN override for a particular page */
int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op)
@@ -899,13 +958,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
"m2p_add_override: pfn %lx not mapped", pfn))
return -EINVAL;
}
- WARN_ON(PagePrivate(page));
- SetPagePrivate(page);
- set_page_private(page, mfn);
- page->index = pfn_to_mfn(pfn);
-
- if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
- return -ENOMEM;
if (kmap_op != NULL) {
if (!PageHighMem(page)) {
@@ -943,20 +995,62 @@ int m2p_add_override(unsigned long mfn, struct page *page,
return 0;
}
EXPORT_SYMBOL_GPL(m2p_add_override);
+
+int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count)
+{
+ int i, ret = 0;
+ bool lazy = false;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ if (kmap_ops &&
+ !in_interrupt() &&
+ paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+ arch_enter_lazy_mmu_mode();
+ lazy = true;
+ }
+
+ for (i = 0; i < count; i++) {
+ unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i]));
+ unsigned long pfn = page_to_pfn(pages[i]);
+
+ if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ set_page_private(pages[i], INVALID_P2M_ENTRY);
+ WARN_ON(!PagePrivate(pages[i]));
+ ClearPagePrivate(pages[i]);
+ set_phys_to_machine(pfn, pages[i]->index);
+
+ if (kmap_ops)
+ ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (lazy)
+ arch_leave_lazy_mmu_mode();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
+
int m2p_remove_override(struct page *page,
- struct gnttab_map_grant_ref *kmap_op)
+ struct gnttab_map_grant_ref *kmap_op,
+ unsigned long mfn)
{
unsigned long flags;
- unsigned long mfn;
unsigned long pfn;
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
pfn = page_to_pfn(page);
- mfn = get_phys_to_machine(pfn);
- if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT))
- return -EINVAL;
if (!PageHighMem(page)) {
address = (unsigned long)__va(pfn << PAGE_SHIFT);
@@ -970,10 +1064,7 @@ int m2p_remove_override(struct page *page,
spin_lock_irqsave(&m2p_override_lock, flags);
list_del(&page->lru);
spin_unlock_irqrestore(&m2p_override_lock, flags);
- WARN_ON(!PagePrivate(page));
- ClearPagePrivate(page);
- set_phys_to_machine(pfn, page->index);
if (kmap_op != NULL) {
if (!PageHighMem(page)) {
struct multicall_space mcs;
OpenPOWER on IntegriCloud