summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S18
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S6
-rw-r--r--arch/x86/crypto/aes_glue.c20
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S896
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c461
-rw-r--r--arch/x86/include/asm/aes.h11
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/fixmap.h10
-rw-r--r--arch/x86/include/asm/i387.h8
-rw-r--r--arch/x86/include/asm/kvm.h24
-rw-r--r--arch/x86/include/asm/kvm_host.h61
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/socket.h3
-rw-r--r--arch/x86/include/asm/svm.h4
-rw-r--r--arch/x86/include/asm/virtext.h2
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig19
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Makefile8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c36
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c54
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c21
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c105
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c193
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.h12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c25
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c73
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c44
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c239
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c386
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/sc520_freq.c30
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c70
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c163
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.h18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c166
-rw-r--r--arch/x86/kernel/cpu/intel.c8
-rw-r--r--arch/x86/kernel/ds.c3
-rw-r--r--arch/x86/kernel/efi.c7
-rw-r--r--arch/x86/kernel/efi_64.c21
-rw-r--r--arch/x86/kernel/hpet.c80
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/kprobes.c3
-rw-r--r--arch/x86/kernel/quirks.c3
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/tsc.c122
-rw-r--r--arch/x86/kvm/Kconfig4
-rw-r--r--arch/x86/kvm/i8254.c21
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c25
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/kvm_svm.h16
-rw-r--r--arch/x86/kvm/mmu.c237
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/paging_tmpl.h219
-rw-r--r--arch/x86/kvm/svm.c916
-rw-r--r--arch/x86/kvm/vmx.c393
-rw-r--r--arch/x86/kvm/x86.c432
-rw-r--r--arch/x86/kvm/x86_emulate.c56
-rw-r--r--arch/x86/lguest/boot.c21
-rw-r--r--arch/x86/math-emu/fpu_aux.c31
-rw-r--r--arch/x86/mm/kmmio.c164
-rw-r--r--arch/x86/mm/pageattr.c11
-rw-r--r--arch/x86/mm/testmmiotrace.c70
68 files changed, 4488 insertions, 1616 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f5cef3fbf9a5..06c02c00d7d9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -169,6 +169,9 @@ config GENERIC_HARDIRQS
bool
default y
+config GENERIC_HARDIRQS_NO__DO_IRQ
+ def_bool y
+
config GENERIC_IRQ_PROBE
bool
default y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 903de4aa5094..ebe7deedd5b4 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
+obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
@@ -19,3 +20,5 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
+
+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index e41b147f4509..b949ec2f9af4 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -41,14 +41,14 @@
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
/* offsets to parameters with one register pushed onto stack */
-#define tfm 8
+#define ctx 8
#define out_blk 12
#define in_blk 16
-/* offsets in crypto_tfm structure */
-#define klen (crypto_tfm_ctx_offset + 0)
-#define ekey (crypto_tfm_ctx_offset + 4)
-#define dkey (crypto_tfm_ctx_offset + 244)
+/* offsets in crypto_aes_ctx structure */
+#define klen (480)
+#define ekey (0)
+#define dkey (240)
// register mapping for encrypt and decrypt subroutines
@@ -217,7 +217,7 @@
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
// AES (Rijndael) Encryption Subroutine
-/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
+/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
.global aes_enc_blk
@@ -228,7 +228,7 @@
aes_enc_blk:
push %ebp
- mov tfm(%esp),%ebp
+ mov ctx(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
@@ -292,7 +292,7 @@ aes_enc_blk:
ret
// AES (Rijndael) Decryption Subroutine
-/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
+/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
.global aes_dec_blk
@@ -303,7 +303,7 @@ aes_enc_blk:
aes_dec_blk:
push %ebp
- mov tfm(%esp),%ebp
+ mov ctx(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index a120f526c3df..5b577d5a059b 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -17,8 +17,6 @@
#include <asm/asm-offsets.h>
-#define BASE crypto_tfm_ctx_offset
-
#define R1 %rax
#define R1E %eax
#define R1X %ax
@@ -56,13 +54,13 @@
.align 8; \
FUNC: movq r1,r2; \
movq r3,r4; \
- leaq BASE+KEY+48+4(r8),r9; \
+ leaq KEY+48(r8),r9; \
movq r10,r11; \
movl (r7),r5 ## E; \
movl 4(r7),r1 ## E; \
movl 8(r7),r6 ## E; \
movl 12(r7),r7 ## E; \
- movl BASE+0(r8),r10 ## E; \
+ movl 480(r8),r10 ## E; \
xorl -48(r9),r5 ## E; \
xorl -44(r9),r1 ## E; \
xorl -40(r9),r6 ## E; \
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 71f457827116..49ae9fe32b22 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -5,17 +5,29 @@
#include <crypto/aes.h>
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
+asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
+
+void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+{
+ aes_enc_blk(ctx, dst, src);
+}
+EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
+
+void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+{
+ aes_dec_blk(ctx, dst, src);
+}
+EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- aes_enc_blk(tfm, dst, src);
+ aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- aes_dec_blk(tfm, dst, src);
+ aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static struct crypto_alg aes_alg = {
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
new file mode 100644
index 000000000000..caba99601703
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -0,0 +1,896 @@
+/*
+ * Implement AES algorithm in Intel AES-NI instructions.
+ *
+ * The white paper of AES-NI instructions can be downloaded from:
+ * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
+ *
+ * Copyright (C) 2008, Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ * Vinodh Gopal <vinodh.gopal@intel.com>
+ * Kahraman Akdemir
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.text
+
+#define STATE1 %xmm0
+#define STATE2 %xmm4
+#define STATE3 %xmm5
+#define STATE4 %xmm6
+#define STATE STATE1
+#define IN1 %xmm1
+#define IN2 %xmm7
+#define IN3 %xmm8
+#define IN4 %xmm9
+#define IN IN1
+#define KEY %xmm2
+#define IV %xmm3
+
+#define KEYP %rdi
+#define OUTP %rsi
+#define INP %rdx
+#define LEN %rcx
+#define IVP %r8
+#define KLEN %r9d
+#define T1 %r10
+#define TKEYP T1
+#define T2 %r11
+
+_key_expansion_128:
+_key_expansion_256a:
+ pshufd $0b11111111, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+ movaps %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+
+_key_expansion_192a:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movaps %xmm2, %xmm5
+ movaps %xmm2, %xmm6
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movaps %xmm0, %xmm1
+ shufps $0b01000100, %xmm0, %xmm6
+ movaps %xmm6, (%rcx)
+ shufps $0b01001110, %xmm2, %xmm1
+ movaps %xmm1, 16(%rcx)
+ add $0x20, %rcx
+ ret
+
+_key_expansion_192b:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movaps %xmm2, %xmm5
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movaps %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+
+_key_expansion_256b:
+ pshufd $0b10101010, %xmm1, %xmm1
+ shufps $0b00010000, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ shufps $0b10001100, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ pxor %xmm1, %xmm2
+ movaps %xmm2, (%rcx)
+ add $0x10, %rcx
+ ret
+
+/*
+ * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ * unsigned int key_len)
+ */
+ENTRY(aesni_set_key)
+ movups (%rsi), %xmm0 # user key (first 16 bytes)
+ movaps %xmm0, (%rdi)
+ lea 0x10(%rdi), %rcx # key addr
+ movl %edx, 480(%rdi)
+ pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
+ cmp $24, %dl
+ jb .Lenc_key128
+ je .Lenc_key192
+ movups 0x10(%rsi), %xmm2 # other user key
+ movaps %xmm2, (%rcx)
+ add $0x10, %rcx
+ # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+ call _key_expansion_256a
+ # aeskeygenassist $0x1, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+ call _key_expansion_256b
+ # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+ call _key_expansion_256a
+ # aeskeygenassist $0x2, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+ call _key_expansion_256b
+ # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+ call _key_expansion_256a
+ # aeskeygenassist $0x4, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+ call _key_expansion_256b
+ # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+ call _key_expansion_256a
+ # aeskeygenassist $0x8, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+ call _key_expansion_256b
+ # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+ call _key_expansion_256a
+ # aeskeygenassist $0x10, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+ call _key_expansion_256b
+ # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+ call _key_expansion_256a
+ # aeskeygenassist $0x20, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+ call _key_expansion_256b
+ # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+ call _key_expansion_256a
+ jmp .Ldec_key
+.Lenc_key192:
+ movq 0x10(%rsi), %xmm2 # other user key
+ # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+ call _key_expansion_192a
+ # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+ call _key_expansion_192b
+ # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+ call _key_expansion_192a
+ # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+ call _key_expansion_192b
+ # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+ call _key_expansion_192a
+ # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+ call _key_expansion_192b
+ # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+ call _key_expansion_192a
+ # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
+ call _key_expansion_192b
+ jmp .Ldec_key
+.Lenc_key128:
+ # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+ call _key_expansion_128
+ # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+ call _key_expansion_128
+ # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+ call _key_expansion_128
+ # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+ call _key_expansion_128
+ # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+ call _key_expansion_128
+ # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+ call _key_expansion_128
+ # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
+ call _key_expansion_128
+ # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
+ call _key_expansion_128
+ # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
+ call _key_expansion_128
+ # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
+ call _key_expansion_128
+.Ldec_key:
+ sub $0x10, %rcx
+ movaps (%rdi), %xmm0
+ movaps (%rcx), %xmm1
+ movaps %xmm0, 240(%rcx)
+ movaps %xmm1, 240(%rdi)
+ add $0x10, %rdi
+ lea 240-16(%rcx), %rsi
+.align 4
+.Ldec_key_loop:
+ movaps (%rdi), %xmm0
+ # aesimc %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
+ movaps %xmm1, (%rsi)
+ add $0x10, %rdi
+ sub $0x10, %rsi
+ cmp %rcx, %rdi
+ jb .Ldec_key_loop
+ xor %rax, %rax
+ ret
+
+/*
+ * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ */
+ENTRY(aesni_enc)
+ movl 480(KEYP), KLEN # key length
+ movups (INP), STATE # input
+ call _aesni_enc1
+ movups STATE, (OUTP) # output
+ ret
+
+/*
+ * _aesni_enc1: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: round count
+ * STATE: initial state (input)
+ * output:
+ * STATE: finial state (output)
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_enc1:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE # round 0
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .Lenc128
+ lea 0x20(TKEYP), TKEYP
+ je .Lenc192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps -0x50(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+.align 4
+.Lenc192:
+ movaps -0x40(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps -0x30(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+.align 4
+.Lenc128:
+ movaps -0x20(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps -0x10(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps (TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x10(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x20(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x30(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x40(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x50(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x60(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x70(TKEYP), KEY
+ # aesenclast KEY, STATE # last round
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
+ ret
+
+/*
+ * _aesni_enc4: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: round count
+ * STATE1: initial state (input)
+ * STATE2
+ * STATE3
+ * STATE4
+ * output:
+ * STATE1: finial state (output)
+ * STATE2
+ * STATE3
+ * STATE4
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_enc4:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE1 # round 0
+ pxor KEY, STATE2
+ pxor KEY, STATE3
+ pxor KEY, STATE4
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .L4enc128
+ lea 0x20(TKEYP), TKEYP
+ je .L4enc192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps -0x50(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+#.align 4
+.L4enc192:
+ movaps -0x40(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps -0x30(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+#.align 4
+.L4enc128:
+ movaps -0x20(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps -0x10(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps (TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x10(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x20(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x30(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x40(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x50(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x60(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x70(TKEYP), KEY
+ # aesenclast KEY, STATE1 # last round
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
+ # aesenclast KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
+ # aesenclast KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xea
+ # aesenclast KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
+ ret
+
+/*
+ * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ */
+ENTRY(aesni_dec)
+ mov 480(KEYP), KLEN # key length
+ add $240, KEYP
+ movups (INP), STATE # input
+ call _aesni_dec1
+ movups STATE, (OUTP) #output
+ ret
+
+/*
+ * _aesni_dec1: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: key length
+ * STATE: initial state (input)
+ * output:
+ * STATE: finial state (output)
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_dec1:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE # round 0
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .Ldec128
+ lea 0x20(TKEYP), TKEYP
+ je .Ldec192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps -0x50(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+.align 4
+.Ldec192:
+ movaps -0x40(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps -0x30(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+.align 4
+.Ldec128:
+ movaps -0x20(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps -0x10(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps (TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x10(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x20(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x30(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x40(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x50(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x60(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x70(TKEYP), KEY
+ # aesdeclast KEY, STATE # last round
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
+ ret
+
+/*
+ * _aesni_dec4: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: key length
+ * STATE1: initial state (input)
+ * STATE2
+ * STATE3
+ * STATE4
+ * output:
+ * STATE1: finial state (output)
+ * STATE2
+ * STATE3
+ * STATE4
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_dec4:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE1 # round 0
+ pxor KEY, STATE2
+ pxor KEY, STATE3
+ pxor KEY, STATE4
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .L4dec128
+ lea 0x20(TKEYP), TKEYP
+ je .L4dec192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps -0x50(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+.align 4
+.L4dec192:
+ movaps -0x40(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps -0x30(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+.align 4
+.L4dec128:
+ movaps -0x20(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps -0x10(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps (TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x10(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x20(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x30(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x40(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x50(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x60(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x70(TKEYP), KEY
+ # aesdeclast KEY, STATE1 # last round
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
+ # aesdeclast KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
+ # aesdeclast KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xea
+ # aesdeclast KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
+ ret
+
+/*
+ * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len)
+ */
+ENTRY(aesni_ecb_enc)
+ test LEN, LEN # check length
+ jz .Lecb_enc_ret
+ mov 480(KEYP), KLEN
+ cmp $16, LEN
+ jb .Lecb_enc_ret
+ cmp $64, LEN
+ jb .Lecb_enc_loop1
+.align 4
+.Lecb_enc_loop4:
+ movups (INP), STATE1
+ movups 0x10(INP), STATE2
+ movups 0x20(INP), STATE3
+ movups 0x30(INP), STATE4
+ call _aesni_enc4
+ movups STATE1, (OUTP)
+ movups STATE2, 0x10(OUTP)
+ movups STATE3, 0x20(OUTP)
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lecb_enc_loop4
+ cmp $16, LEN
+ jb .Lecb_enc_ret
+.align 4
+.Lecb_enc_loop1:
+ movups (INP), STATE1
+ call _aesni_enc1
+ movups STATE1, (OUTP)
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lecb_enc_loop1
+.Lecb_enc_ret:
+ ret
+
+/*
+ * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len);
+ */
+ENTRY(aesni_ecb_dec)
+ test LEN, LEN
+ jz .Lecb_dec_ret
+ mov 480(KEYP), KLEN
+ add $240, KEYP
+ cmp $16, LEN
+ jb .Lecb_dec_ret
+ cmp $64, LEN
+ jb .Lecb_dec_loop1
+.align 4
+.Lecb_dec_loop4:
+ movups (INP), STATE1
+ movups 0x10(INP), STATE2
+ movups 0x20(INP), STATE3
+ movups 0x30(INP), STATE4
+ call _aesni_dec4
+ movups STATE1, (OUTP)
+ movups STATE2, 0x10(OUTP)
+ movups STATE3, 0x20(OUTP)
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lecb_dec_loop4
+ cmp $16, LEN
+ jb .Lecb_dec_ret
+.align 4
+.Lecb_dec_loop1:
+ movups (INP), STATE1
+ call _aesni_dec1
+ movups STATE1, (OUTP)
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lecb_dec_loop1
+.Lecb_dec_ret:
+ ret
+
+/*
+ * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len, u8 *iv)
+ */
+ENTRY(aesni_cbc_enc)
+ cmp $16, LEN
+ jb .Lcbc_enc_ret
+ mov 480(KEYP), KLEN
+ movups (IVP), STATE # load iv as initial state
+.align 4
+.Lcbc_enc_loop:
+ movups (INP), IN # load input
+ pxor IN, STATE
+ call _aesni_enc1
+ movups STATE, (OUTP) # store output
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lcbc_enc_loop
+ movups STATE, (IVP)
+.Lcbc_enc_ret:
+ ret
+
+/*
+ * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len, u8 *iv)
+ */
+ENTRY(aesni_cbc_dec)
+ cmp $16, LEN
+ jb .Lcbc_dec_ret
+ mov 480(KEYP), KLEN
+ add $240, KEYP
+ movups (IVP), IV
+ cmp $64, LEN
+ jb .Lcbc_dec_loop1
+.align 4
+.Lcbc_dec_loop4:
+ movups (INP), IN1
+ movaps IN1, STATE1
+ movups 0x10(INP), IN2
+ movaps IN2, STATE2
+ movups 0x20(INP), IN3
+ movaps IN3, STATE3
+ movups 0x30(INP), IN4
+ movaps IN4, STATE4
+ call _aesni_dec4
+ pxor IV, STATE1
+ pxor IN1, STATE2
+ pxor IN2, STATE3
+ pxor IN3, STATE4
+ movaps IN4, IV
+ movups STATE1, (OUTP)
+ movups STATE2, 0x10(OUTP)
+ movups STATE3, 0x20(OUTP)
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lcbc_dec_loop4
+ cmp $16, LEN
+ jb .Lcbc_dec_ret
+.align 4
+.Lcbc_dec_loop1:
+ movups (INP), IN
+ movaps IN, STATE
+ call _aesni_dec1
+ pxor IV, STATE
+ movups STATE, (OUTP)
+ movaps IN, IV
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lcbc_dec_loop1
+ movups IV, (IVP)
+.Lcbc_dec_ret:
+ ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
new file mode 100644
index 000000000000..02af0af65497
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -0,0 +1,461 @@
+/*
+ * Support for Intel AES-NI instructions. This file contains glue
+ * code, the real AES implementation is in intel-aes_asm.S.
+ *
+ * Copyright (C) 2008, Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/cryptd.h>
+#include <asm/i387.h>
+#include <asm/aes.h>
+
+struct async_aes_ctx {
+ struct cryptd_ablkcipher *cryptd_tfm;
+};
+
+#define AESNI_ALIGN 16
+#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
+
+asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ unsigned int key_len);
+asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in);
+asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in);
+asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len);
+asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len);
+asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
+
+static inline int kernel_fpu_using(void)
+{
+ if (in_interrupt() && !(read_cr0() & X86_CR0_TS))
+ return 1;
+ return 0;
+}
+
+static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
+{
+ unsigned long addr = (unsigned long)raw_ctx;
+ unsigned long align = AESNI_ALIGN;
+
+ if (align <= crypto_tfm_ctx_alignment())
+ align = 1;
+ return (struct crypto_aes_ctx *)ALIGN(addr, align);
+}
+
+static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
+ const u8 *in_key, unsigned int key_len)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx);
+ u32 *flags = &tfm->crt_flags;
+ int err;
+
+ if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
+ key_len != AES_KEYSIZE_256) {
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ if (kernel_fpu_using())
+ err = crypto_aes_expand_key(ctx, in_key, key_len);
+ else {
+ kernel_fpu_begin();
+ err = aesni_set_key(ctx, in_key, key_len);
+ kernel_fpu_end();
+ }
+
+ return err;
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
+}
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+ if (kernel_fpu_using())
+ crypto_aes_encrypt_x86(ctx, dst, src);
+ else {
+ kernel_fpu_begin();
+ aesni_enc(ctx, dst, src);
+ kernel_fpu_end();
+ }
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+ if (kernel_fpu_using())
+ crypto_aes_decrypt_x86(ctx, dst, src);
+ else {
+ kernel_fpu_begin();
+ aesni_dec(ctx, dst, src);
+ kernel_fpu_end();
+ }
+}
+
+static struct crypto_alg aesni_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-aesni",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list),
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = aes_set_key,
+ .cia_encrypt = aes_encrypt,
+ .cia_decrypt = aes_decrypt
+ }
+ }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
+ aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ kernel_fpu_end();
+
+ return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
+ aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ kernel_fpu_end();
+
+ return err;
+}
+
+static struct crypto_alg blk_ecb_alg = {
+ .cra_name = "__ecb-aes-aesni",
+ .cra_driver_name = "__driver-ecb-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+};
+
+static int cbc_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
+ aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK, walk.iv);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ kernel_fpu_end();
+
+ return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
+ aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK, walk.iv);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ kernel_fpu_end();
+
+ return err;
+}
+
+static struct crypto_alg blk_cbc_alg = {
+ .cra_name = "__cbc-aes-aesni",
+ .cra_driver_name = "__driver-cbc-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+};
+
+static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len);
+}
+
+static int ablk_encrypt(struct ablkcipher_request *req)
+{
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+ struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ if (kernel_fpu_using()) {
+ struct ablkcipher_request *cryptd_req =
+ ablkcipher_request_ctx(req);
+ memcpy(cryptd_req, req, sizeof(*req));
+ ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+ return crypto_ablkcipher_encrypt(cryptd_req);
+ } else {
+ struct blkcipher_desc desc;
+ desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+ desc.info = req->info;
+ desc.flags = 0;
+ return crypto_blkcipher_crt(desc.tfm)->encrypt(
+ &desc, req->dst, req->src, req->nbytes);
+ }
+}
+
+static int ablk_decrypt(struct ablkcipher_request *req)
+{
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+ struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ if (kernel_fpu_using()) {
+ struct ablkcipher_request *cryptd_req =
+ ablkcipher_request_ctx(req);
+ memcpy(cryptd_req, req, sizeof(*req));
+ ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+ return crypto_ablkcipher_decrypt(cryptd_req);
+ } else {
+ struct blkcipher_desc desc;
+ desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+ desc.info = req->info;
+ desc.flags = 0;
+ return crypto_blkcipher_crt(desc.tfm)->decrypt(
+ &desc, req->dst, req->src, req->nbytes);
+ }
+}
+
+static void ablk_exit(struct crypto_tfm *tfm)
+{
+ struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cryptd_free_ablkcipher(ctx->cryptd_tfm);
+}
+
+static void ablk_init_common(struct crypto_tfm *tfm,
+ struct cryptd_ablkcipher *cryptd_tfm)
+{
+ struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ ctx->cryptd_tfm = cryptd_tfm;
+ tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
+ crypto_ablkcipher_reqsize(&cryptd_tfm->base);
+}
+
+static int ablk_ecb_init(struct crypto_tfm *tfm)
+{
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ablk_init_common(tfm, cryptd_tfm);
+ return 0;
+}
+
+static struct crypto_alg ablk_ecb_alg = {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
+ .cra_init = ablk_ecb_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+};
+
+static int ablk_cbc_init(struct crypto_tfm *tfm)
+{
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ablk_init_common(tfm, cryptd_tfm);
+ return 0;
+}
+
+static struct crypto_alg ablk_cbc_alg = {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
+ .cra_init = ablk_cbc_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+};
+
+static int __init aesni_init(void)
+{
+ int err;
+
+ if (!cpu_has_aes) {
+ printk(KERN_ERR "Intel AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+ if ((err = crypto_register_alg(&aesni_alg)))
+ goto aes_err;
+ if ((err = crypto_register_alg(&blk_ecb_alg)))
+ goto blk_ecb_err;
+ if ((err = crypto_register_alg(&blk_cbc_alg)))
+ goto blk_cbc_err;
+ if ((err = crypto_register_alg(&ablk_ecb_alg)))
+ goto ablk_ecb_err;
+ if ((err = crypto_register_alg(&ablk_cbc_alg)))
+ goto ablk_cbc_err;
+
+ return err;
+
+ablk_cbc_err:
+ crypto_unregister_alg(&ablk_ecb_alg);
+ablk_ecb_err:
+ crypto_unregister_alg(&blk_cbc_alg);
+blk_cbc_err:
+ crypto_unregister_alg(&blk_ecb_alg);
+blk_ecb_err:
+ crypto_unregister_alg(&aesni_alg);
+aes_err:
+ return err;
+}
+
+static void __exit aesni_exit(void)
+{
+ crypto_unregister_alg(&ablk_cbc_alg);
+ crypto_unregister_alg(&ablk_ecb_alg);
+ crypto_unregister_alg(&blk_cbc_alg);
+ crypto_unregister_alg(&blk_ecb_alg);
+ crypto_unregister_alg(&aesni_alg);
+}
+
+module_init(aesni_init);
+module_exit(aesni_exit);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("aes");
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/aes.h
new file mode 100644
index 000000000000..80545a1cbe39
--- /dev/null
+++ b/arch/x86/include/asm/aes.h
@@ -0,0 +1,11 @@
+#ifndef ASM_X86_AES_H
+#define ASM_X86_AES_H
+
+#include <linux/crypto.h>
+#include <crypto/aes.h>
+
+void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
+ const u8 *src);
+void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
+ const u8 *src);
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7301e60dc4a8..0beba0d1468d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -213,6 +213,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ca5ffb2856b6..edc90f23e708 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -37,8 +37,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
#else /* !CONFIG_X86_32 */
-#define MAX_EFI_IO_PAGES 100
-
extern u64 efi_call0(void *fp);
extern u64 efi_call1(void *fp, u64 arg1);
extern u64 efi_call2(void *fp, u64 arg1, u64 arg2);
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index dca8f03da5b2..63a79c77d220 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -24,9 +24,6 @@
#include <asm/kmap_types.h>
#else
#include <asm/vsyscall.h>
-#ifdef CONFIG_EFI
-#include <asm/efi.h>
-#endif
#endif
/*
@@ -92,13 +89,6 @@ enum fixed_addresses {
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
#endif
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_EFI
- FIX_EFI_IO_MAP_LAST_PAGE,
- FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
- + MAX_EFI_IO_PAGES - 1,
-#endif
-#endif
#ifdef CONFIG_X86_VISWS_APIC
FIX_CO_CPU, /* Cobalt timer */
FIX_CO_APIC, /* Cobalt APIC Redirection Table */
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 48f0004db8c9..71c9e5183982 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -172,7 +172,13 @@ static inline void __save_init_fpu(struct task_struct *tsk)
#else /* CONFIG_X86_32 */
-extern void finit(void);
+#ifdef CONFIG_MATH_EMULATION
+extern void finit_task(struct task_struct *tsk);
+#else
+static inline void finit_task(struct task_struct *tsk)
+{
+}
+#endif
static inline void tolerant_fwait(void)
{
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 886c9402ec45..dc3f6cf11704 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -15,6 +15,7 @@
#define __KVM_HAVE_DEVICE_ASSIGNMENT
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
+#define __KVM_HAVE_GUEST_DEBUG
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
@@ -212,7 +213,30 @@ struct kvm_pit_channel_state {
__s64 count_load_time;
};
+struct kvm_debug_exit_arch {
+ __u32 exception;
+ __u32 pad;
+ __u64 pc;
+ __u64 dr6;
+ __u64 dr7;
+};
+
+#define KVM_GUESTDBG_USE_SW_BP 0x00010000
+#define KVM_GUESTDBG_USE_HW_BP 0x00020000
+#define KVM_GUESTDBG_INJECT_DB 0x00040000
+#define KVM_GUESTDBG_INJECT_BP 0x00080000
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+ __u64 debugreg[8];
+};
+
struct kvm_pit_state {
struct kvm_pit_channel_state channels[3];
};
+
+struct kvm_reinject_control {
+ __u8 pit_reinject;
+ __u8 reserved[31];
+};
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 730843d1d2fb..f0faf58044ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
+#include <asm/msr-index.h>
#define KVM_MAX_VCPUS 16
#define KVM_MEMORY_SLOTS 32
@@ -134,11 +135,18 @@ enum {
#define KVM_NR_MEM_OBJS 40
-struct kvm_guest_debug {
- int enabled;
- unsigned long bp[4];
- int singlestep;
-};
+#define KVM_NR_DB_REGS 4
+
+#define DR6_BD (1 << 13)
+#define DR6_BS (1 << 14)
+#define DR6_FIXED_1 0xffff0ff0
+#define DR6_VOLATILE 0x0000e00f
+
+#define DR7_BP_EN_MASK 0x000000ff
+#define DR7_GE (1 << 9)
+#define DR7_GD (1 << 13)
+#define DR7_FIXED_1 0x00000400
+#define DR7_VOLATILE 0xffff23ff
/*
* We don't want allocation failures within the mmu code, so we preallocate
@@ -162,7 +170,8 @@ struct kvm_pte_chain {
* bits 0:3 - total guest paging levels (2-4, or zero for real mode)
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
- * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ * bit 16 - direct mapping of virtual to physical mapping at gfn
+ * used for real mode and two-dimensional paging
* bits 17:19 - common access permissions for all ptes in this shadow page
*/
union kvm_mmu_page_role {
@@ -172,9 +181,10 @@ union kvm_mmu_page_role {
unsigned level:4;
unsigned quadrant:2;
unsigned pad_for_nice_hex_output:6;
- unsigned metaphysical:1;
+ unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
+ unsigned cr4_pge:1;
};
};
@@ -218,6 +228,18 @@ struct kvm_pv_mmu_op_buffer {
char buf[512] __aligned(sizeof(long));
};
+struct kvm_pio_request {
+ unsigned long count;
+ int cur_count;
+ gva_t guest_gva;
+ int in;
+ int port;
+ int size;
+ int string;
+ int down;
+ int rep;
+};
+
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -236,6 +258,7 @@ struct kvm_mmu {
hpa_t root_hpa;
int root_level;
int shadow_root_level;
+ union kvm_mmu_page_role base_role;
u64 *pae_root;
};
@@ -258,6 +281,7 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr8;
+ u32 hflags;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 apic_base;
@@ -338,6 +362,15 @@ struct kvm_vcpu_arch {
struct mtrr_state_type mtrr_state;
u32 pat;
+
+ int switch_db_regs;
+ unsigned long host_db[KVM_NR_DB_REGS];
+ unsigned long host_dr6;
+ unsigned long host_dr7;
+ unsigned long db[KVM_NR_DB_REGS];
+ unsigned long dr6;
+ unsigned long dr7;
+ unsigned long eff_db[KVM_NR_DB_REGS];
};
struct kvm_mem_alias {
@@ -378,6 +411,7 @@ struct kvm_arch{
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
+ u64 vm_init_tsc;
};
struct kvm_vm_stat {
@@ -446,8 +480,7 @@ struct kvm_x86_ops {
void (*vcpu_put)(struct kvm_vcpu *vcpu);
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg);
- void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
+ struct kvm_guest_debug *dbg);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -583,16 +616,12 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
u32 error_code);
-void kvm_pic_set_irq(void *opaque, int irq, int level);
+int kvm_pic_set_irq(void *opaque, int irq, int level);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
void fx_init(struct kvm_vcpu *vcpu);
-int emulator_read_std(unsigned long addr,
- void *val,
- unsigned int bytes,
- struct kvm_vcpu *vcpu);
int emulator_write_emulated(unsigned long addr,
const void *val,
unsigned int bytes,
@@ -737,6 +766,10 @@ enum {
TASK_SWITCH_GATE = 3,
};
+#define HF_GIF_MASK (1 << 0)
+#define HF_HIF_MASK (1 << 1)
+#define HF_VINTR_MASK (1 << 2)
+
/*
* Hardware virtualization extension instructions may fault if a
* reboot turns off virtualization while processes are running.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 358acc59ae04..f4e505f286bc 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,11 +18,15 @@
#define _EFER_LME 8 /* Long mode enable */
#define _EFER_LMA 10 /* Long mode active (read-only) */
#define _EFER_NX 11 /* No execute enable */
+#define _EFER_SVME 12 /* Enable virtualization */
+#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
#define EFER_LMA (1<<_EFER_LMA)
#define EFER_NX (1<<_EFER_NX)
+#define EFER_SVME (1<<_EFER_SVME)
+#define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_PERFCTR0 0x000000c1
@@ -360,4 +364,9 @@
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+/* AMD-V MSRs */
+
+#define MSR_VM_CR 0xc0010114
+#define MSR_VM_HSAVE_PA 0xc0010117
+
#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index 8ab9cc8b2ecc..ca8bf2cd0ba9 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -54,4 +54,7 @@
#define SO_MARK 36
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
#endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e869..82ada75f3ebf 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CPUID_FEATURE_SHIFT 2
#define SVM_CPUID_FUNC 0x8000000a
-#define MSR_EFER_SVME_MASK (1ULL << 12)
-#define MSR_VM_CR 0xc0010114
-#define MSR_VM_HSAVE_PA 0xc0010117ULL
-
#define SVM_VM_CR_SVM_DISABLE 4
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 593636275238..e0f9aa16358b 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void)
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+ wrmsrl(MSR_EFER, efer & ~EFER_SVME);
}
/** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d0238e6151d8..498f944010b9 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -270,8 +270,9 @@ enum vmcs_field {
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
-#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
+#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
+#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
@@ -311,7 +312,7 @@ enum vmcs_field {
#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
#define TYPE_MOV_TO_DR (0 << 4)
#define TYPE_MOV_FROM_DR (1 << 4)
-#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */
+#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
/* segment AR */
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 65792c2cc462..52c839875478 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -87,30 +87,15 @@ config X86_POWERNOW_K7_ACPI
config X86_POWERNOW_K8
tristate "AMD Opteron/Athlon64 PowerNow!"
select CPU_FREQ_TABLE
+ depends on ACPI && ACPI_PROCESSOR
help
- This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
+ This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
To compile this driver as a module, choose M here: the
module will be called powernow-k8.
For details, take a look at <file:Documentation/cpu-freq/>.
- If in doubt, say N.
-
-config X86_POWERNOW_K8_ACPI
- bool
- prompt "ACPI Support" if X86_32
- depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR
- depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
- default y
- help
- This provides access to the K8s Processor Performance States via ACPI.
- This driver is probably required for CPUFreq to work with multi-socket and
- SMP systems. It is not required on at least some single-socket yet
- multi-core systems, even if SMP is enabled.
-
- It is safe to say Y here.
-
config X86_GX_SUSPMOD
tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
depends on X86_32 && PCI
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 560f7760dae5..509296df294d 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -1,6 +1,11 @@
+# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
+# K8 systems. ACPI is preferred to all other hardware-specific drivers.
+# speedstep-* is preferred over p4-clockmod.
+
+obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
-obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o
obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o
@@ -10,7 +15,6 @@ obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o
obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o
obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 22590cf688ae..23da96e57b17 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -1,5 +1,5 @@
/*
- * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $)
+ * acpi-cpufreq.c - ACPI Processor P-States Driver
*
* Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
* Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
@@ -36,16 +36,18 @@
#include <linux/ftrace.h>
#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+
#include <acpi/processor.h>
-#include <asm/io.h>
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
-#include <asm/delay.h>
-#include <asm/uaccess.h>
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "acpi-cpufreq", msg)
MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
@@ -95,7 +97,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
perf = data->acpi_data;
- for (i=0; i<perf->state_count; i++) {
+ for (i = 0; i < perf->state_count; i++) {
if (value == perf->states[i].status)
return data->freq_table[i].frequency;
}
@@ -110,7 +112,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
msr &= INTEL_MSR_RANGE;
perf = data->acpi_data;
- for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+ for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
if (msr == perf->states[data->freq_table[i].index].status)
return data->freq_table[i].frequency;
}
@@ -138,15 +140,13 @@ struct io_addr {
u8 bit_width;
};
-typedef union {
- struct msr_addr msr;
- struct io_addr io;
-} drv_addr_union;
-
struct drv_cmd {
unsigned int type;
const struct cpumask *mask;
- drv_addr_union addr;
+ union {
+ struct msr_addr msr;
+ struct io_addr io;
+ } addr;
u32 val;
};
@@ -369,7 +369,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
unsigned int cur_freq;
unsigned int i;
- for (i=0; i<100; i++) {
+ for (i = 0; i < 100; i++) {
cur_freq = extract_freq(get_cur_val(mask), data);
if (cur_freq == freq)
return 1;
@@ -494,7 +494,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
unsigned long freq;
unsigned long freqn = perf->states[0].core_frequency * 1000;
- for (i=0; i<(perf->state_count-1); i++) {
+ for (i = 0; i < (perf->state_count-1); i++) {
freq = freqn;
freqn = perf->states[i+1].core_frequency * 1000;
if ((2 * cpu_khz) > (freqn + freq)) {
@@ -673,7 +673,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
/* detect transition latency */
policy->cpuinfo.transition_latency = 0;
- for (i=0; i<perf->state_count; i++) {
+ for (i = 0; i < perf->state_count; i++) {
if ((perf->states[i].transition_latency * 1000) >
policy->cpuinfo.transition_latency)
policy->cpuinfo.transition_latency =
@@ -682,8 +682,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
data->max_freq = perf->states[0].core_frequency * 1000;
/* table init */
- for (i=0; i<perf->state_count; i++) {
- if (i>0 && perf->states[i].core_frequency >=
+ for (i = 0; i < perf->state_count; i++) {
+ if (i > 0 && perf->states[i].core_frequency >=
data->freq_table[valid_states-1].frequency / 1000)
continue;
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 965ea52767ac..733093d60436 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -32,7 +32,7 @@
* nforce2_chipset:
* FSB is changed using the chipset
*/
-static struct pci_dev *nforce2_chipset_dev;
+static struct pci_dev *nforce2_dev;
/* fid:
* multiplier * 10
@@ -56,7 +56,9 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
MODULE_PARM_DESC(min_fsb,
"Minimum FSB to use, if not defined: current FSB - 50");
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
+#define PFX "cpufreq-nforce2: "
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "cpufreq-nforce2", msg)
/**
* nforce2_calc_fsb - calculate FSB
@@ -118,11 +120,11 @@ static void nforce2_write_pll(int pll)
int temp;
/* Set the pll addr. to 0x00 */
- pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0);
+ pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0);
/* Now write the value in all 64 registers */
for (temp = 0; temp <= 0x3f; temp++)
- pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll);
+ pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
return;
}
@@ -139,8 +141,8 @@ static unsigned int nforce2_fsb_read(int bootfsb)
u32 fsb, temp = 0;
/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
- nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
- 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL);
+ nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF,
+ PCI_ANY_ID, PCI_ANY_ID, NULL);
if (!nforce2_sub5)
return 0;
@@ -148,13 +150,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
fsb /= 1000000;
/* Check if PLL register is already set */
- pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
+ pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
if (bootfsb || !temp)
return fsb;
/* Use PLL register FSB value */
- pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp);
+ pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp);
fsb = nforce2_calc_fsb(temp);
return fsb;
@@ -174,18 +176,18 @@ static int nforce2_set_fsb(unsigned int fsb)
int pll = 0;
if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
- printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb);
+ printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
return -EINVAL;
}
tfsb = nforce2_fsb_read(0);
if (!tfsb) {
- printk(KERN_ERR "cpufreq: Error while reading the FSB\n");
+ printk(KERN_ERR PFX "Error while reading the FSB\n");
return -EINVAL;
}
/* First write? Then set actual value */
- pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
+ pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
if (!temp) {
pll = nforce2_calc_pll(tfsb);
@@ -197,7 +199,7 @@ static int nforce2_set_fsb(unsigned int fsb)
/* Enable write access */
temp = 0x01;
- pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp);
+ pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp);
diff = tfsb - fsb;
@@ -222,7 +224,7 @@ static int nforce2_set_fsb(unsigned int fsb)
}
temp = 0x40;
- pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp);
+ pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp);
return 0;
}
@@ -244,7 +246,8 @@ static unsigned int nforce2_get(unsigned int cpu)
* nforce2_target - set a new CPUFreq policy
* @policy: new policy
* @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @relation: how that frequency relates to achieved frequency
+ * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
*
* Sets a new CPUFreq policy.
*/
@@ -276,7 +279,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
/* local_irq_save(flags); */
if (nforce2_set_fsb(target_fsb) < 0)
- printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
+ printk(KERN_ERR PFX "Changing FSB to %d failed\n",
target_fsb);
else
dprintk("Changed FSB successfully to %d\n",
@@ -327,8 +330,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
/* FIX: Get FID from CPU */
if (!fid) {
if (!cpu_khz) {
- printk(KERN_WARNING
- "cpufreq: cpu_khz not set, can't calculate multiplier!\n");
+ printk(KERN_WARNING PFX
+ "cpu_khz not set, can't calculate multiplier!\n");
return -ENODEV;
}
@@ -343,7 +346,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
}
}
- printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb,
+ printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
fid / 10, fid % 10);
/* Set maximum FSB to FSB at boot time */
@@ -392,17 +395,18 @@ static struct cpufreq_driver nforce2_driver = {
*/
static unsigned int nforce2_detect_chipset(void)
{
- nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
+ nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
PCI_DEVICE_ID_NVIDIA_NFORCE2,
PCI_ANY_ID, PCI_ANY_ID, NULL);
- if (nforce2_chipset_dev == NULL)
+ if (nforce2_dev == NULL)
return -ENODEV;
- printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n",
- nforce2_chipset_dev->revision);
- printk(KERN_INFO
- "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n");
+ printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
+ nforce2_dev->revision);
+ printk(KERN_INFO PFX
+ "FSB changing is maybe unstable and can lead to "
+ "crashes and data loss.\n");
return 0;
}
@@ -420,7 +424,7 @@ static int __init nforce2_init(void)
/* detect chipset */
if (nforce2_detect_chipset()) {
- printk(KERN_ERR "cpufreq: No nForce2 chipset.\n");
+ printk(KERN_INFO PFX "No nForce2 chipset.\n");
return -ENODEV;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index 41ab3f064cb1..35a257dd4bb7 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -12,12 +12,12 @@
#include <linux/cpufreq.h>
#include <linux/ioport.h>
#include <linux/slab.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+#include <linux/delay.h>
#include <asm/msr.h>
#include <asm/tsc.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-#include <asm/delay.h>
#define EPS_BRAND_C7M 0
#define EPS_BRAND_C7 1
@@ -184,7 +184,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
break;
}
- switch(brand) {
+ switch (brand) {
case EPS_BRAND_C7M:
printk(KERN_CONT "C7-M\n");
break;
@@ -218,17 +218,20 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
/* Print voltage and multiplier */
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
current_voltage = lo & 0xff;
- printk(KERN_INFO "eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
+ printk(KERN_INFO "eps: Current voltage = %dmV\n",
+ current_voltage * 16 + 700);
current_multiplier = (lo >> 8) & 0xff;
printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
/* Print limits */
max_voltage = hi & 0xff;
- printk(KERN_INFO "eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
+ printk(KERN_INFO "eps: Highest voltage = %dmV\n",
+ max_voltage * 16 + 700);
max_multiplier = (hi >> 8) & 0xff;
printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
min_voltage = (hi >> 16) & 0xff;
- printk(KERN_INFO "eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
+ printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
+ min_voltage * 16 + 700);
min_multiplier = (hi >> 24) & 0xff;
printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
@@ -318,7 +321,7 @@ static int eps_cpu_exit(struct cpufreq_policy *policy)
return 0;
}
-static struct freq_attr* eps_attr[] = {
+static struct freq_attr *eps_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -356,7 +359,7 @@ static void __exit eps_exit(void)
cpufreq_unregister_driver(&eps_driver);
}
-MODULE_AUTHOR("Rafa³ Bilski <rafalbilski@interia.pl>");
+MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index fe613c93b366..006b278b0d5d 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -184,7 +184,8 @@ static int elanfreq_target(struct cpufreq_policy *policy,
{
unsigned int newstate = 0;
- if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate))
+ if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
+ target_freq, relation, &newstate))
return -EINVAL;
elanfreq_set_cpu_state(newstate);
@@ -301,7 +302,8 @@ static void __exit elanfreq_exit(void)
module_param(max_freq, int, 0444);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>");
+MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, "
+ "Sven Geggus <sven@geggus.net>");
MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
module_init(elanfreq_init);
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 9d9eae82e60f..ac27ec2264d5 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -79,8 +79,9 @@
#include <linux/smp.h>
#include <linux/cpufreq.h>
#include <linux/pci.h>
+#include <linux/errno.h>
+
#include <asm/processor-cyrix.h>
-#include <asm/errno.h>
/* PCI config registers, all at F0 */
#define PCI_PMER1 0x80 /* power management enable register 1 */
@@ -122,8 +123,8 @@ static struct gxfreq_params *gx_params;
static int stock_freq;
/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
-static int pci_busclk = 0;
-module_param (pci_busclk, int, 0444);
+static int pci_busclk;
+module_param(pci_busclk, int, 0444);
/* maximum duration for which the cpu may be suspended
* (32us * MAX_DURATION). If no parameter is given, this defaults
@@ -132,7 +133,7 @@ module_param (pci_busclk, int, 0444);
* is suspended -- processing power is just 0.39% of what it used to be,
* though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
static int max_duration = 255;
-module_param (max_duration, int, 0444);
+module_param(max_duration, int, 0444);
/* For the default policy, we want at least some processing power
* - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
@@ -140,7 +141,8 @@ module_param (max_duration, int, 0444);
#define POLICY_MIN_DIV 20
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "gx-suspmod", msg)
/**
* we can detect a core multipiler from dir0_lsb
@@ -166,12 +168,20 @@ static int gx_freq_mult[16] = {
* Low Level chipset interface *
****************************************************************/
static struct pci_device_id gx_chipset_tbl[] __initdata = {
- { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID },
- { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID },
- { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID },
+ { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY,
+ PCI_ANY_ID, PCI_ANY_ID },
+ { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520,
+ PCI_ANY_ID, PCI_ANY_ID },
+ { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510,
+ PCI_ANY_ID, PCI_ANY_ID },
{ 0, },
};
+static void gx_write_byte(int reg, int value)
+{
+ pci_write_config_byte(gx_params->cs55x0, reg, value);
+}
+
/**
* gx_detect_chipset:
*
@@ -200,7 +210,8 @@ static __init struct pci_dev *gx_detect_chipset(void)
/**
* gx_get_cpuspeed:
*
- * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs.
+ * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi
+ * Geode CPU runs.
*/
static unsigned int gx_get_cpuspeed(unsigned int cpu)
{
@@ -217,17 +228,18 @@ static unsigned int gx_get_cpuspeed(unsigned int cpu)
*
**/
-static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration)
+static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration,
+ u8 *off_duration)
{
unsigned int i;
u8 tmp_on, tmp_off;
int old_tmp_freq = stock_freq;
int tmp_freq;
- *off_duration=1;
- *on_duration=0;
+ *off_duration = 1;
+ *on_duration = 0;
- for (i=max_duration; i>0; i--) {
+ for (i = max_duration; i > 0; i--) {
tmp_off = ((khz * i) / stock_freq) & 0xff;
tmp_on = i - tmp_off;
tmp_freq = (stock_freq * tmp_off) / i;
@@ -259,26 +271,34 @@ static void gx_set_cpuspeed(unsigned int khz)
freqs.cpu = 0;
freqs.old = gx_get_cpuspeed(0);
- new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration);
+ new_khz = gx_validate_speed(khz, &gx_params->on_duration,
+ &gx_params->off_duration);
freqs.new = new_khz;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
local_irq_save(flags);
- if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */
+
+
+ if (new_khz != stock_freq) {
+ /* if new khz == 100% of CPU speed, it is special case */
switch (gx_params->cs55x0->device) {
case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
/* FIXME: need to test other values -- Zwane,Miura */
- pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */
- pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */
- pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1);
-
- if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */
- suscfg = gx_params->pci_suscfg | SUSMOD;
- } else { /* CS5530A,B.. */
- suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE;
+ /* typical 2 to 4ms */
+ gx_write_byte(PCI_IRQTC, 4);
+ /* typical 50 to 100ms */
+ gx_write_byte(PCI_VIDTC, 100);
+ gx_write_byte(PCI_PMER1, pmer1);
+
+ if (gx_params->cs55x0->revision < 0x10) {
+ /* CS5530(rev 1.2, 1.3) */
+ suscfg = gx_params->pci_suscfg|SUSMOD;
+ } else {
+ /* CS5530A,B.. */
+ suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE;
}
break;
case PCI_DEVICE_ID_CYRIX_5520:
@@ -294,13 +314,13 @@ static void gx_set_cpuspeed(unsigned int khz)
suscfg = gx_params->pci_suscfg & ~(SUSMOD);
gx_params->off_duration = 0;
gx_params->on_duration = 0;
- dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n");
+ dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
}
- pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration);
- pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration);
+ gx_write_byte(PCI_MODOFF, gx_params->off_duration);
+ gx_write_byte(PCI_MODON, gx_params->on_duration);
- pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg);
+ gx_write_byte(PCI_SUSCFG, suscfg);
pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
local_irq_restore(flags);
@@ -334,7 +354,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy)
return -EINVAL;
policy->cpu = 0;
- cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
+ cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
+ stock_freq);
/* it needs to be assured that at least one supported frequency is
* within policy->min and policy->max. If it is not, policy->max
@@ -354,7 +375,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy)
policy->max = tmp_freq;
if (policy->max < policy->min)
policy->max = policy->min;
- cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq);
+ cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
+ stock_freq);
return 0;
}
@@ -398,18 +420,18 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
return -ENODEV;
/* determine maximum frequency */
- if (pci_busclk) {
+ if (pci_busclk)
maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
- } else if (cpu_khz) {
+ else if (cpu_khz)
maxfreq = cpu_khz;
- } else {
+ else
maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
- }
+
stock_freq = maxfreq;
curfreq = gx_get_cpuspeed(0);
dprintk("cpu max frequency is %d.\n", maxfreq);
- dprintk("cpu current frequency is %dkHz.\n",curfreq);
+ dprintk("cpu current frequency is %dkHz.\n", curfreq);
/* setup basic struct for cpufreq API */
policy->cpu = 0;
@@ -447,7 +469,8 @@ static int __init cpufreq_gx_init(void)
struct pci_dev *gx_pci;
/* Test if we have the right hardware */
- if ((gx_pci = gx_detect_chipset()) == NULL)
+ gx_pci = gx_detect_chipset();
+ if (gx_pci == NULL)
return -ENODEV;
/* check whether module parameters are sane */
@@ -468,9 +491,11 @@ static int __init cpufreq_gx_init(void)
pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
- pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration));
+ pci_read_config_byte(params->cs55x0, PCI_MODOFF,
+ &(params->off_duration));
- if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) {
+ ret = cpufreq_register_driver(&gx_suspmod_driver);
+ if (ret) {
kfree(params);
return ret; /* register error! */
}
@@ -485,9 +510,9 @@ static void __exit cpufreq_gx_exit(void)
kfree(gx_params);
}
-MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>");
-MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
+MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
+MODULE_LICENSE("GPL");
module_init(cpufreq_gx_init);
module_exit(cpufreq_gx_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index a4cff5d6e380..f1c51aea064d 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -30,12 +30,12 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+#include <linux/kernel.h>
#include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
-#include <asm/acpi.h>
-#include <linux/acpi.h>
#include <acpi/processor.h>
#include "longhaul.h"
@@ -58,7 +58,7 @@
#define USE_NORTHBRIDGE (1 << 2)
static int cpu_model;
-static unsigned int numscales=16;
+static unsigned int numscales = 16;
static unsigned int fsb;
static const struct mV_pos *vrm_mV_table;
@@ -67,8 +67,8 @@ static const unsigned char *mV_vrm_table;
static unsigned int highest_speed, lowest_speed; /* kHz */
static unsigned int minmult, maxmult;
static int can_scale_voltage;
-static struct acpi_processor *pr = NULL;
-static struct acpi_processor_cx *cx = NULL;
+static struct acpi_processor *pr;
+static struct acpi_processor_cx *cx;
static u32 acpi_regs_addr;
static u8 longhaul_flags;
static unsigned int longhaul_index;
@@ -78,12 +78,13 @@ static int scale_voltage;
static int disable_acpi_c3;
static int revid_errata;
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "longhaul", msg)
/* Clock ratios multiplied by 10 */
-static int clock_ratio[32];
-static int eblcr_table[32];
+static int mults[32];
+static int eblcr[32];
static int longhaul_version;
static struct cpufreq_frequency_table *longhaul_table;
@@ -93,7 +94,7 @@ static char speedbuffer[8];
static char *print_speed(int speed)
{
if (speed < 1000) {
- snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed);
+ snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed);
return speedbuffer;
}
@@ -122,27 +123,28 @@ static unsigned int calc_speed(int mult)
static int longhaul_get_cpu_mult(void)
{
- unsigned long invalue=0,lo, hi;
+ unsigned long invalue = 0, lo, hi;
- rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi);
- invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22;
- if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) {
+ rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi);
+ invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22;
+ if (longhaul_version == TYPE_LONGHAUL_V2 ||
+ longhaul_version == TYPE_POWERSAVER) {
if (lo & (1<<27))
- invalue+=16;
+ invalue += 16;
}
- return eblcr_table[invalue];
+ return eblcr[invalue];
}
/* For processor with BCR2 MSR */
-static void do_longhaul1(unsigned int clock_ratio_index)
+static void do_longhaul1(unsigned int mults_index)
{
union msr_bcr2 bcr2;
rdmsrl(MSR_VIA_BCR2, bcr2.val);
/* Enable software clock multiplier */
bcr2.bits.ESOFTBF = 1;
- bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff;
+ bcr2.bits.CLOCKMUL = mults_index & 0xff;
/* Sync to timer tick */
safe_halt();
@@ -161,7 +163,7 @@ static void do_longhaul1(unsigned int clock_ratio_index)
/* For processor with Longhaul MSR */
-static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
+static void do_powersaver(int cx_address, unsigned int mults_index,
unsigned int dir)
{
union msr_longhaul longhaul;
@@ -173,11 +175,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
else
longhaul.bits.RevisionKey = 0;
- longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf;
- longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
+ longhaul.bits.SoftBusRatio = mults_index & 0xf;
+ longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4;
/* Setup new voltage */
if (can_scale_voltage)
- longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f;
+ longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f;
/* Sync to timer tick */
safe_halt();
/* Raise voltage if necessary */
@@ -240,14 +242,14 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
/**
* longhaul_set_cpu_frequency()
- * @clock_ratio_index : bitpattern of the new multiplier.
+ * @mults_index : bitpattern of the new multiplier.
*
* Sets a new clock ratio.
*/
static void longhaul_setstate(unsigned int table_index)
{
- unsigned int clock_ratio_index;
+ unsigned int mults_index;
int speed, mult;
struct cpufreq_freqs freqs;
unsigned long flags;
@@ -256,9 +258,9 @@ static void longhaul_setstate(unsigned int table_index)
u32 bm_timeout = 1000;
unsigned int dir = 0;
- clock_ratio_index = longhaul_table[table_index].index;
+ mults_index = longhaul_table[table_index].index;
/* Safety precautions */
- mult = clock_ratio[clock_ratio_index & 0x1f];
+ mult = mults[mults_index & 0x1f];
if (mult == -1)
return;
speed = calc_speed(mult);
@@ -274,7 +276,7 @@ static void longhaul_setstate(unsigned int table_index)
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
- dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+ dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
fsb, mult/10, mult%10, print_speed(speed/1000));
retry_loop:
preempt_disable();
@@ -282,8 +284,8 @@ retry_loop:
pic2_mask = inb(0xA1);
pic1_mask = inb(0x21); /* works on C3. save mask. */
- outb(0xFF,0xA1); /* Overkill */
- outb(0xFE,0x21); /* TMR0 only */
+ outb(0xFF, 0xA1); /* Overkill */
+ outb(0xFE, 0x21); /* TMR0 only */
/* Wait while PCI bus is busy. */
if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
@@ -312,7 +314,7 @@ retry_loop:
* Software controlled multipliers only.
*/
case TYPE_LONGHAUL_V1:
- do_longhaul1(clock_ratio_index);
+ do_longhaul1(mults_index);
break;
/*
@@ -327,9 +329,9 @@ retry_loop:
if (longhaul_flags & USE_ACPI_C3) {
/* Don't allow wakeup */
acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
- do_powersaver(cx->address, clock_ratio_index, dir);
+ do_powersaver(cx->address, mults_index, dir);
} else {
- do_powersaver(0, clock_ratio_index, dir);
+ do_powersaver(0, mults_index, dir);
}
break;
}
@@ -341,8 +343,8 @@ retry_loop:
/* Enable bus master arbitration */
acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
}
- outb(pic2_mask,0xA1); /* restore mask */
- outb(pic1_mask,0x21);
+ outb(pic2_mask, 0xA1); /* restore mask */
+ outb(pic1_mask, 0x21);
local_irq_restore(flags);
preempt_enable();
@@ -392,7 +394,8 @@ retry_loop:
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
if (!bm_timeout)
- printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n");
+ printk(KERN_INFO PFX "Warning: Timeout while waiting for "
+ "idle PCI bus.\n");
}
/*
@@ -458,31 +461,32 @@ static int __init longhaul_get_ranges(void)
break;
}
- dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n",
+ dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
minmult/10, minmult%10, maxmult/10, maxmult%10);
highest_speed = calc_speed(maxmult);
lowest_speed = calc_speed(minmult);
- dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
+ dprintk("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
print_speed(lowest_speed/1000),
print_speed(highest_speed/1000));
if (lowest_speed == highest_speed) {
- printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n");
+ printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
return -EINVAL;
}
if (lowest_speed > highest_speed) {
- printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
+ printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
lowest_speed, highest_speed);
return -EINVAL;
}
- longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL);
- if(!longhaul_table)
+ longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table),
+ GFP_KERNEL);
+ if (!longhaul_table)
return -ENOMEM;
for (j = 0; j < numscales; j++) {
- ratio = clock_ratio[j];
+ ratio = mults[j];
if (ratio == -1)
continue;
if (ratio > maxmult || ratio < minmult)
@@ -507,13 +511,10 @@ static int __init longhaul_get_ranges(void)
}
}
if (min_i != j) {
- unsigned int temp;
- temp = longhaul_table[j].frequency;
- longhaul_table[j].frequency = longhaul_table[min_i].frequency;
- longhaul_table[min_i].frequency = temp;
- temp = longhaul_table[j].index;
- longhaul_table[j].index = longhaul_table[min_i].index;
- longhaul_table[min_i].index = temp;
+ swap(longhaul_table[j].frequency,
+ longhaul_table[min_i].frequency);
+ swap(longhaul_table[j].index,
+ longhaul_table[min_i].index);
}
}
@@ -521,7 +522,7 @@ static int __init longhaul_get_ranges(void)
/* Find index we are running on */
for (j = 0; j < k; j++) {
- if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) {
+ if (mults[longhaul_table[j].index & 0x1f] == mult) {
longhaul_index = j;
break;
}
@@ -559,20 +560,22 @@ static void __init longhaul_setup_voltagescaling(void)
maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
- printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
+ printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
"Voltage scaling disabled.\n",
- minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000);
+ minvid.mV/1000, minvid.mV%1000,
+ maxvid.mV/1000, maxvid.mV%1000);
return;
}
if (minvid.mV == maxvid.mV) {
- printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
- "both %d.%03d. Voltage scaling disabled\n",
+ printk(KERN_INFO PFX "Claims to support voltage scaling but "
+ "min & max are both %d.%03d. "
+ "Voltage scaling disabled\n",
maxvid.mV/1000, maxvid.mV%1000);
return;
}
- /* How many voltage steps */
+ /* How many voltage steps*/
numvscales = maxvid.pos - minvid.pos + 1;
printk(KERN_INFO PFX
"Max VID=%d.%03d "
@@ -586,7 +589,7 @@ static void __init longhaul_setup_voltagescaling(void)
j = longhaul.bits.MinMHzBR;
if (longhaul.bits.MinMHzBR4)
j += 16;
- min_vid_speed = eblcr_table[j];
+ min_vid_speed = eblcr[j];
if (min_vid_speed == -1)
return;
switch (longhaul.bits.MinMHzFSB) {
@@ -617,7 +620,8 @@ static void __init longhaul_setup_voltagescaling(void)
pos = minvid.pos;
longhaul_table[j].index |= mV_vrm_table[pos] << 8;
vid = vrm_mV_table[mV_vrm_table[pos]];
- printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV);
+ printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
+ speed, j, vid.mV);
j++;
}
@@ -640,7 +644,8 @@ static int longhaul_target(struct cpufreq_policy *policy,
unsigned int dir = 0;
u8 vid, current_vid;
- if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index))
+ if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
+ relation, &table_index))
return -EINVAL;
/* Don't set same frequency again */
@@ -656,7 +661,8 @@ static int longhaul_target(struct cpufreq_policy *policy,
* this in hardware, C3 is old and we need to do this
* in software. */
i = longhaul_index;
- current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f;
+ current_vid = (longhaul_table[longhaul_index].index >> 8);
+ current_vid &= 0x1f;
if (table_index > longhaul_index)
dir = 1;
while (i != table_index) {
@@ -691,9 +697,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
{
struct acpi_device *d;
- if ( acpi_bus_get_device(obj_handle, &d) ) {
+ if (acpi_bus_get_device(obj_handle, &d))
return 0;
- }
+
*return_value = acpi_driver_data(d);
return 1;
}
@@ -750,7 +756,7 @@ static int longhaul_setup_southbridge(void)
/* Find VT8235 southbridge */
dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
if (dev == NULL)
- /* Find VT8237 southbridge */
+ /* Find VT8237 southbridge */
dev = pci_get_device(PCI_VENDOR_ID_VIA,
PCI_DEVICE_ID_VIA_8237, NULL);
if (dev != NULL) {
@@ -769,7 +775,8 @@ static int longhaul_setup_southbridge(void)
if (pci_cmd & 1 << 7) {
pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
acpi_regs_addr &= 0xff00;
- printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr);
+ printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
+ acpi_regs_addr);
}
pci_dev_put(dev);
@@ -781,7 +788,7 @@ static int longhaul_setup_southbridge(void)
static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
{
struct cpuinfo_x86 *c = &cpu_data(0);
- char *cpuname=NULL;
+ char *cpuname = NULL;
int ret;
u32 lo, hi;
@@ -791,8 +798,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
cpu_model = CPU_SAMUEL;
cpuname = "C3 'Samuel' [C5A]";
longhaul_version = TYPE_LONGHAUL_V1;
- memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio));
- memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr));
+ memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
+ memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr));
break;
case 7:
@@ -803,10 +810,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
cpuname = "C3 'Samuel 2' [C5B]";
/* Note, this is not a typo, early Samuel2's had
* Samuel1 ratios. */
- memcpy(clock_ratio, samuel1_clock_ratio,
- sizeof(samuel1_clock_ratio));
- memcpy(eblcr_table, samuel2_eblcr,
- sizeof(samuel2_eblcr));
+ memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
+ memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
break;
case 1 ... 15:
longhaul_version = TYPE_LONGHAUL_V1;
@@ -817,10 +822,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
cpu_model = CPU_EZRA;
cpuname = "C3 'Ezra' [C5C]";
}
- memcpy(clock_ratio, ezra_clock_ratio,
- sizeof(ezra_clock_ratio));
- memcpy(eblcr_table, ezra_eblcr,
- sizeof(ezra_eblcr));
+ memcpy(mults, ezra_mults, sizeof(ezra_mults));
+ memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr));
break;
}
break;
@@ -829,18 +832,16 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
cpu_model = CPU_EZRA_T;
cpuname = "C3 'Ezra-T' [C5M]";
longhaul_version = TYPE_POWERSAVER;
- numscales=32;
- memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio));
- memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr));
+ numscales = 32;
+ memcpy(mults, ezrat_mults, sizeof(ezrat_mults));
+ memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr));
break;
case 9:
longhaul_version = TYPE_POWERSAVER;
numscales = 32;
- memcpy(clock_ratio,
- nehemiah_clock_ratio,
- sizeof(nehemiah_clock_ratio));
- memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr));
+ memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
+ memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
switch (c->x86_mask) {
case 0 ... 1:
cpu_model = CPU_NEHEMIAH;
@@ -869,14 +870,14 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
longhaul_version = TYPE_LONGHAUL_V1;
}
- printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
+ printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
switch (longhaul_version) {
case TYPE_LONGHAUL_V1:
case TYPE_LONGHAUL_V2:
- printk ("Longhaul v%d supported.\n", longhaul_version);
+ printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
break;
case TYPE_POWERSAVER:
- printk ("Powersaver supported.\n");
+ printk(KERN_CONT "Powersaver supported.\n");
break;
};
@@ -940,7 +941,7 @@ static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
return 0;
}
-static struct freq_attr* longhaul_attr[] = {
+static struct freq_attr *longhaul_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -966,13 +967,15 @@ static int __init longhaul_init(void)
#ifdef CONFIG_SMP
if (num_online_cpus() > 1) {
- printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n");
+ printk(KERN_ERR PFX "More than 1 CPU detected, "
+ "longhaul disabled.\n");
return -ENODEV;
}
#endif
#ifdef CONFIG_X86_IO_APIC
if (cpu_has_apic) {
- printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n");
+ printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
+ "broken in this configuration.\n");
return -ENODEV;
}
#endif
@@ -993,8 +996,8 @@ static void __exit longhaul_exit(void)
{
int i;
- for (i=0; i < numscales; i++) {
- if (clock_ratio[i] == maxmult) {
+ for (i = 0; i < numscales; i++) {
+ if (mults[i] == maxmult) {
longhaul_setstate(i);
break;
}
@@ -1007,11 +1010,11 @@ static void __exit longhaul_exit(void)
/* Even if BIOS is exporting ACPI C3 state, and it is used
* with success when CPU is idle, this state doesn't
* trigger frequency transition in some cases. */
-module_param (disable_acpi_c3, int, 0644);
+module_param(disable_acpi_c3, int, 0644);
MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
/* Change CPU voltage with frequency. Very usefull to save
* power, but most VIA C3 processors aren't supporting it. */
-module_param (scale_voltage, int, 0644);
+module_param(scale_voltage, int, 0644);
MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
/* Force revision key to 0 for processors which doesn't
* support voltage scaling, but are introducing itself as
@@ -1019,9 +1022,9 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
module_param(revid_errata, int, 0644);
MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
-MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
-MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
+MODULE_LICENSE("GPL");
late_initcall(longhaul_init);
module_exit(longhaul_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h
index 4fcc320997df..e2360a469f79 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h
@@ -49,14 +49,14 @@ union msr_longhaul {
/*
* Clock ratio tables. Div/Mod by 10 to get ratio.
- * The eblcr ones specify the ratio read from the CPU.
- * The clock_ratio ones specify what to write to the CPU.
+ * The eblcr values specify the ratio read from the CPU.
+ * The mults values specify what to write to the CPU.
*/
/*
* VIA C3 Samuel 1 & Samuel 2 (stepping 0)
*/
-static const int __initdata samuel1_clock_ratio[16] = {
+static const int __initdata samuel1_mults[16] = {
-1, /* 0000 -> RESERVED */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = {
/*
* VIA C3 Ezra
*/
-static const int __initdata ezra_clock_ratio[16] = {
+static const int __initdata ezra_mults[16] = {
100, /* 0000 -> 10.0x */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = {
/*
* VIA C3 (Ezra-T) [C5M].
*/
-static const int __initdata ezrat_clock_ratio[32] = {
+static const int __initdata ezrat_mults[32] = {
100, /* 0000 -> 10.0x */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = {
/*
* VIA C3 Nehemiah */
-static const int __initdata nehemiah_clock_ratio[32] = {
+static const int __initdata nehemiah_mults[32] = {
100, /* 0000 -> 10.0x */
-1, /* 0001 -> 16.0x */
40, /* 0010 -> 4.0x */
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index 777a7ff075de..da5f70fcb766 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -11,12 +11,13 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/cpufreq.h>
+#include <linux/timex.h>
#include <asm/msr.h>
#include <asm/processor.h>
-#include <asm/timex.h>
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "longrun", msg)
static struct cpufreq_driver longrun_driver;
@@ -51,7 +52,7 @@ static void __init longrun_get_policy(struct cpufreq_policy *policy)
msr_lo &= 0x0000007F;
msr_hi &= 0x0000007F;
- if ( longrun_high_freq <= longrun_low_freq ) {
+ if (longrun_high_freq <= longrun_low_freq) {
/* Assume degenerate Longrun table */
policy->min = policy->max = longrun_high_freq;
} else {
@@ -79,7 +80,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy)
if (!policy)
return -EINVAL;
- if ( longrun_high_freq <= longrun_low_freq ) {
+ if (longrun_high_freq <= longrun_low_freq) {
/* Assume degenerate Longrun table */
pctg_lo = pctg_hi = 100;
} else {
@@ -152,7 +153,7 @@ static unsigned int longrun_get(unsigned int cpu)
cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
dprintk("cpuid eax is %u\n", eax);
- return (eax * 1000);
+ return eax * 1000;
}
/**
@@ -196,7 +197,8 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
*high_freq = msr_lo * 1000; /* to kHz */
- dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq);
+ dprintk("longrun table interface told %u - %u kHz\n",
+ *low_freq, *high_freq);
if (*low_freq > *high_freq)
*low_freq = *high_freq;
@@ -219,7 +221,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
/* try decreasing in 10% steps, some processors react only
* on some barrier values */
- for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) {
+ for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) {
/* set to 0 to try_hi perf_pctg */
msr_lo &= 0xFFFFFF80;
msr_hi &= 0xFFFFFF80;
@@ -236,7 +238,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
* eqals
- * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
+ * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
*
* high_freq * perf_pctg is stored tempoarily into "ebx".
*/
@@ -317,9 +319,10 @@ static void __exit longrun_exit(void)
}
-MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and "
+ "Efficeon processors.");
+MODULE_LICENSE("GPL");
module_init(longrun_init);
module_exit(longrun_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b585e04cbc9e..41ed94915f97 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -27,15 +27,17 @@
#include <linux/cpufreq.h>
#include <linux/slab.h>
#include <linux/cpumask.h>
+#include <linux/timex.h>
#include <asm/processor.h>
#include <asm/msr.h>
-#include <asm/timex.h>
+#include <asm/timer.h>
#include "speedstep-lib.h"
#define PFX "p4-clockmod: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "p4-clockmod", msg)
/*
* Duty Cycle (3bits), note DC_DISABLE is not specified in
@@ -58,7 +60,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
{
u32 l, h;
- if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV))
+ if (!cpu_online(cpu) ||
+ (newstate > DC_DISABLE) || (newstate == DC_RESV))
return -EINVAL;
rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
@@ -66,7 +69,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
if (l & 0x01)
dprintk("CPU#%d currently thermal throttled\n", cpu);
- if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT))
+ if (has_N44_O17_errata[cpu] &&
+ (newstate == DC_25PT || newstate == DC_DFLT))
newstate = DC_38PT;
rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
@@ -112,7 +116,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
struct cpufreq_freqs freqs;
int i;
- if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate))
+ if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
+ target_freq, relation, &newstate))
return -EINVAL;
freqs.old = cpufreq_p4_get(policy->cpu);
@@ -127,7 +132,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
- /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
+ /* run on each logical CPU,
+ * see section 13.15.3 of IA32 Intel Architecture Software
* Developer's Manual, Volume 3
*/
for_each_cpu(i, policy->cpus)
@@ -153,28 +159,30 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
{
if (c->x86 == 0x06) {
if (cpu_has(c, X86_FEATURE_EST))
- printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. "
- "The acpi-cpufreq module offers voltage scaling"
- " in addition of frequency scaling. You should use "
- "that instead of p4-clockmod, if possible.\n");
+ printk(KERN_WARNING PFX "Warning: EST-capable CPU "
+ "detected. The acpi-cpufreq module offers "
+ "voltage scaling in addition of frequency "
+ "scaling. You should use that instead of "
+ "p4-clockmod, if possible.\n");
switch (c->x86_model) {
case 0x0E: /* Core */
case 0x0F: /* Core Duo */
case 0x16: /* Celeron Core */
p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
- return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE);
+ return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
case 0x0D: /* Pentium M (Dothan) */
p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
/* fall through */
case 0x09: /* Pentium M (Banias) */
- return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM);
+ return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
}
}
if (c->x86 != 0xF) {
if (!cpu_has(c, X86_FEATURE_EST))
- printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. "
- "Please send an e-mail to <cpufreq@vger.kernel.org>\n");
+ printk(KERN_WARNING PFX "Unknown CPU. "
+ "Please send an e-mail to "
+ "<cpufreq@vger.kernel.org>\n");
return 0;
}
@@ -182,16 +190,16 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
* throttling is active or not. */
p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
- if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) {
+ if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
"The speedstep-ich or acpi cpufreq modules offer "
"voltage scaling in addition of frequency scaling. "
"You should use either one instead of p4-clockmod, "
"if possible.\n");
- return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M);
+ return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
}
- return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D);
+ return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
}
@@ -217,14 +225,20 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
dprintk("has errata -- disabling low frequencies\n");
}
+ if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
+ c->x86_model < 2) {
+ /* switch to maximum frequency and measure result */
+ cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
+ recalibrate_cpu_khz();
+ }
/* get max frequency */
stock_freq = cpufreq_p4_get_frequency(c);
if (!stock_freq)
return -EINVAL;
/* table init */
- for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
- if ((i<2) && (has_N44_O17_errata[policy->cpu]))
+ for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+ if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
else
p4clockmod_table[i].frequency = (stock_freq * i)/8;
@@ -232,7 +246,10 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
/* cpuinfo and default policy values */
- policy->cpuinfo.transition_latency = 1000000; /* assumed */
+
+ /* the transition latency is set to be 1 higher than the maximum
+ * transition latency of the ondemand governor */
+ policy->cpuinfo.transition_latency = 10000001;
policy->cur = stock_freq;
return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
@@ -258,12 +275,12 @@ static unsigned int cpufreq_p4_get(unsigned int cpu)
l = DC_DISABLE;
if (l != DC_DISABLE)
- return (stock_freq * l / 8);
+ return stock_freq * l / 8;
return stock_freq;
}
-static struct freq_attr* p4clockmod_attr[] = {
+static struct freq_attr *p4clockmod_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -277,7 +294,6 @@ static struct cpufreq_driver p4clockmod_driver = {
.name = "p4-clockmod",
.owner = THIS_MODULE,
.attr = p4clockmod_attr,
- .hide_interface = 1,
};
@@ -299,9 +315,10 @@ static int __init cpufreq_p4_init(void)
ret = cpufreq_register_driver(&p4clockmod_driver);
if (!ret)
- printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
+ printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
+ "Modulation available\n");
- return (ret);
+ return ret;
}
@@ -311,9 +328,9 @@ static void __exit cpufreq_p4_exit(void)
}
-MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>");
-MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
+MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
+MODULE_LICENSE("GPL");
late_initcall(cpufreq_p4_init);
module_exit(cpufreq_p4_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index c1ac5790c63e..f10dea409f40 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -1,6 +1,7 @@
/*
* This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
- * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski.
+ * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä,
+ * Dominik Brodowski.
*
* Licensed under the terms of the GNU GPL License version 2.
*
@@ -13,14 +14,15 @@
#include <linux/cpufreq.h>
#include <linux/ioport.h>
#include <linux/slab.h>
-
-#include <asm/msr.h>
#include <linux/timex.h>
#include <linux/io.h>
+#include <asm/msr.h>
+
#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
as it is unused */
+#define PFX "powernow-k6: "
static unsigned int busfreq; /* FSB, in 10 kHz */
static unsigned int max_multiplier;
@@ -47,8 +49,8 @@ static struct cpufreq_frequency_table clock_ratio[] = {
*/
static int powernow_k6_get_cpu_multiplier(void)
{
- u64 invalue = 0;
- u32 msrval;
+ u64 invalue = 0;
+ u32 msrval;
msrval = POWERNOW_IOPORT + 0x1;
wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
@@ -68,12 +70,12 @@ static int powernow_k6_get_cpu_multiplier(void)
*/
static void powernow_k6_set_state(unsigned int best_i)
{
- unsigned long outvalue = 0, invalue = 0;
- unsigned long msrval;
- struct cpufreq_freqs freqs;
+ unsigned long outvalue = 0, invalue = 0;
+ unsigned long msrval;
+ struct cpufreq_freqs freqs;
if (clock_ratio[best_i].index > max_multiplier) {
- printk(KERN_ERR "cpufreq: invalid target frequency\n");
+ printk(KERN_ERR PFX "invalid target frequency\n");
return;
}
@@ -119,7 +121,8 @@ static int powernow_k6_verify(struct cpufreq_policy *policy)
* powernow_k6_setpolicy - sets a new CPUFreq policy
* @policy: new policy
* @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @relation: how that frequency relates to achieved frequency
+ * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
*
* sets a new CPUFreq policy
*/
@@ -127,9 +130,10 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
- unsigned int newstate = 0;
+ unsigned int newstate = 0;
- if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate))
+ if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
+ target_freq, relation, &newstate))
return -EINVAL;
powernow_k6_set_state(newstate);
@@ -140,7 +144,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
{
- unsigned int i;
+ unsigned int i, f;
int result;
if (policy->cpu != 0)
@@ -152,10 +156,11 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
/* table init */
for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
- if (clock_ratio[i].index > max_multiplier)
+ f = clock_ratio[i].index;
+ if (f > max_multiplier)
clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
else
- clock_ratio[i].frequency = busfreq * clock_ratio[i].index;
+ clock_ratio[i].frequency = busfreq * f;
}
/* cpuinfo and default policy values */
@@ -185,7 +190,9 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
static unsigned int powernow_k6_get(unsigned int cpu)
{
- return busfreq * powernow_k6_get_cpu_multiplier();
+ unsigned int ret;
+ ret = (busfreq * powernow_k6_get_cpu_multiplier());
+ return ret;
}
static struct freq_attr *powernow_k6_attr[] = {
@@ -221,7 +228,7 @@ static int __init powernow_k6_init(void)
return -ENODEV;
if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
- printk("cpufreq: PowerNow IOPORT region already used.\n");
+ printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
return -EIO;
}
@@ -246,7 +253,8 @@ static void __exit powernow_k6_exit(void)
}
-MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
+ "Dominik Brodowski <linux@brodo.de>");
MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 1b446d79a8fd..3c28ccd49742 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -6,10 +6,12 @@
* Licensed under the terms of the GNU GPL License version 2.
* Based upon datasheets & sample CPUs kindly provided by AMD.
*
- * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt.
- * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
- * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect.
- * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
+ * Errata 5:
+ * CPU may fail to execute a FID/VID change in presence of interrupt.
+ * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
+ * Errata 15:
+ * CPU with half frequency multipliers may hang upon wakeup from disconnect.
+ * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
*/
#include <linux/kernel.h>
@@ -20,11 +22,11 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/dmi.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+#include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */
#include <asm/msr.h>
-#include <asm/timer.h>
-#include <asm/timex.h>
-#include <asm/io.h>
#include <asm/system.h>
#ifdef CONFIG_X86_POWERNOW_K7_ACPI
@@ -58,9 +60,9 @@ struct pst_s {
union powernow_acpi_control_t {
struct {
unsigned long fid:5,
- vid:5,
- sgtc:20,
- res1:2;
+ vid:5,
+ sgtc:20,
+ res1:2;
} bits;
unsigned long val;
};
@@ -94,14 +96,15 @@ static struct cpufreq_frequency_table *powernow_table;
static unsigned int can_scale_bus;
static unsigned int can_scale_vid;
-static unsigned int minimum_speed=-1;
+static unsigned int minimum_speed = -1;
static unsigned int maximum_speed;
static unsigned int number_scales;
static unsigned int fsb;
static unsigned int latency;
static char have_a0;
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "powernow-k7", msg)
static int check_fsb(unsigned int fsbspeed)
{
@@ -109,7 +112,7 @@ static int check_fsb(unsigned int fsbspeed)
unsigned int f = fsb / 1000;
delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
- return (delta < 5);
+ return delta < 5;
}
static int check_powernow(void)
@@ -117,24 +120,26 @@ static int check_powernow(void)
struct cpuinfo_x86 *c = &cpu_data(0);
unsigned int maxei, eax, ebx, ecx, edx;
- if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) {
+ if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
#ifdef MODULE
- printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n");
+ printk(KERN_INFO PFX "This module only works with "
+ "AMD K7 CPUs\n");
#endif
return 0;
}
/* Get maximum capabilities */
- maxei = cpuid_eax (0x80000000);
+ maxei = cpuid_eax(0x80000000);
if (maxei < 0x80000007) { /* Any powernow info ? */
#ifdef MODULE
- printk (KERN_INFO PFX "No powernow capabilities detected\n");
+ printk(KERN_INFO PFX "No powernow capabilities detected\n");
#endif
return 0;
}
if ((c->x86_model == 6) && (c->x86_mask == 0)) {
- printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n");
+ printk(KERN_INFO PFX "K7 660[A0] core detected, "
+ "enabling errata workarounds\n");
have_a0 = 1;
}
@@ -144,37 +149,42 @@ static int check_powernow(void)
if (!(edx & (1 << 1 | 1 << 2)))
return 0;
- printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
+ printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
if (edx & 1 << 1) {
- printk ("frequency");
- can_scale_bus=1;
+ printk("frequency");
+ can_scale_bus = 1;
}
if ((edx & (1 << 1 | 1 << 2)) == 0x6)
- printk (" and ");
+ printk(" and ");
if (edx & 1 << 2) {
- printk ("voltage");
- can_scale_vid=1;
+ printk("voltage");
+ can_scale_vid = 1;
}
- printk (".\n");
+ printk(".\n");
return 1;
}
+static void invalidate_entry(unsigned int entry)
+{
+ powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+}
-static int get_ranges (unsigned char *pst)
+static int get_ranges(unsigned char *pst)
{
unsigned int j;
unsigned int speed;
u8 fid, vid;
- powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
+ powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
+ (number_scales + 1)), GFP_KERNEL);
if (!powernow_table)
return -ENOMEM;
- for (j=0 ; j < number_scales; j++) {
+ for (j = 0 ; j < number_scales; j++) {
fid = *pst++;
powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
@@ -182,10 +192,10 @@ static int get_ranges (unsigned char *pst)
speed = powernow_table[j].frequency;
- if ((fid_codes[fid] % 10)==5) {
+ if ((fid_codes[fid] % 10) == 5) {
#ifdef CONFIG_X86_POWERNOW_K7_ACPI
if (have_a0 == 1)
- powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID;
+ invalidate_entry(j);
#endif
}
@@ -197,7 +207,7 @@ static int get_ranges (unsigned char *pst)
vid = *pst++;
powernow_table[j].index |= (vid << 8); /* upper 8 bits */
- dprintk (" FID: 0x%x (%d.%dx [%dMHz]) "
+ dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
"VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
fid_codes[fid] % 10, speed/1000, vid,
mobile_vid_table[vid]/1000,
@@ -214,13 +224,13 @@ static void change_FID(int fid)
{
union msr_fidvidctl fidvidctl;
- rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+ rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
if (fidvidctl.bits.FID != fid) {
fidvidctl.bits.SGTC = latency;
fidvidctl.bits.FID = fid;
fidvidctl.bits.VIDC = 0;
fidvidctl.bits.FIDC = 1;
- wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+ wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
}
}
@@ -229,18 +239,18 @@ static void change_VID(int vid)
{
union msr_fidvidctl fidvidctl;
- rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+ rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
if (fidvidctl.bits.VID != vid) {
fidvidctl.bits.SGTC = latency;
fidvidctl.bits.VID = vid;
fidvidctl.bits.FIDC = 0;
fidvidctl.bits.VIDC = 1;
- wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val);
+ wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
}
}
-static void change_speed (unsigned int index)
+static void change_speed(unsigned int index)
{
u8 fid, vid;
struct cpufreq_freqs freqs;
@@ -257,7 +267,7 @@ static void change_speed (unsigned int index)
freqs.cpu = 0;
- rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+ rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
cfid = fidvidstatus.bits.CFID;
freqs.old = fsb * fid_codes[cfid] / 10;
@@ -321,12 +331,14 @@ static int powernow_acpi_init(void)
goto err1;
}
- if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
+ if (acpi_processor_perf->control_register.space_id !=
+ ACPI_ADR_SPACE_FIXED_HARDWARE) {
retval = -ENODEV;
goto err2;
}
- if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) {
+ if (acpi_processor_perf->status_register.space_id !=
+ ACPI_ADR_SPACE_FIXED_HARDWARE) {
retval = -ENODEV;
goto err2;
}
@@ -338,7 +350,8 @@ static int powernow_acpi_init(void)
goto err2;
}
- powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
+ powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
+ (number_scales + 1)), GFP_KERNEL);
if (!powernow_table) {
retval = -ENOMEM;
goto err2;
@@ -352,7 +365,7 @@ static int powernow_acpi_init(void)
unsigned int speed, speed_mhz;
pc.val = (unsigned long) state->control;
- dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+ dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
i,
(u32) state->core_frequency,
(u32) state->power,
@@ -381,12 +394,12 @@ static int powernow_acpi_init(void)
if (speed % 1000 > 0)
speed_mhz++;
- if ((fid_codes[fid] % 10)==5) {
+ if ((fid_codes[fid] % 10) == 5) {
if (have_a0 == 1)
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ invalidate_entry(i);
}
- dprintk (" FID: 0x%x (%d.%dx [%dMHz]) "
+ dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
"VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
fid_codes[fid] % 10, speed_mhz, vid,
mobile_vid_table[vid]/1000,
@@ -422,7 +435,8 @@ err1:
err05:
kfree(acpi_processor_perf);
err0:
- printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
+ printk(KERN_WARNING PFX "ACPI perflib can not be used on "
+ "this platform\n");
acpi_processor_perf = NULL;
return retval;
}
@@ -435,7 +449,14 @@ static int powernow_acpi_init(void)
}
#endif
-static int powernow_decode_bios (int maxfid, int startvid)
+static void print_pst_entry(struct pst_s *pst, unsigned int j)
+{
+ dprintk("PST:%d (@%p)\n", j, pst);
+ dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
+ pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
+}
+
+static int powernow_decode_bios(int maxfid, int startvid)
{
struct psb_s *psb;
struct pst_s *pst;
@@ -446,61 +467,67 @@ static int powernow_decode_bios (int maxfid, int startvid)
etuple = cpuid_eax(0x80000001);
- for (i=0xC0000; i < 0xffff0 ; i+=16) {
+ for (i = 0xC0000; i < 0xffff0 ; i += 16) {
p = phys_to_virt(i);
- if (memcmp(p, "AMDK7PNOW!", 10) == 0){
- dprintk ("Found PSB header at %p\n", p);
+ if (memcmp(p, "AMDK7PNOW!", 10) == 0) {
+ dprintk("Found PSB header at %p\n", p);
psb = (struct psb_s *) p;
- dprintk ("Table version: 0x%x\n", psb->tableversion);
+ dprintk("Table version: 0x%x\n", psb->tableversion);
if (psb->tableversion != 0x12) {
- printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n");
+ printk(KERN_INFO PFX "Sorry, only v1.2 tables"
+ " supported right now\n");
return -ENODEV;
}
- dprintk ("Flags: 0x%x\n", psb->flags);
- if ((psb->flags & 1)==0) {
- dprintk ("Mobile voltage regulator\n");
- } else {
- dprintk ("Desktop voltage regulator\n");
- }
+ dprintk("Flags: 0x%x\n", psb->flags);
+ if ((psb->flags & 1) == 0)
+ dprintk("Mobile voltage regulator\n");
+ else
+ dprintk("Desktop voltage regulator\n");
latency = psb->settlingtime;
if (latency < 100) {
- printk(KERN_INFO PFX "BIOS set settling time to %d microseconds. "
- "Should be at least 100. Correcting.\n", latency);
+ printk(KERN_INFO PFX "BIOS set settling time "
+ "to %d microseconds. "
+ "Should be at least 100. "
+ "Correcting.\n", latency);
latency = 100;
}
- dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime);
- dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst);
+ dprintk("Settling Time: %d microseconds.\n",
+ psb->settlingtime);
+ dprintk("Has %d PST tables. (Only dumping ones "
+ "relevant to this CPU).\n",
+ psb->numpst);
- p += sizeof (struct psb_s);
+ p += sizeof(struct psb_s);
pst = (struct pst_s *) p;
- for (j=0; j<psb->numpst; j++) {
+ for (j = 0; j < psb->numpst; j++) {
pst = (struct pst_s *) p;
number_scales = pst->numpstates;
- if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) &&
- (maxfid==pst->maxfid) && (startvid==pst->startvid))
- {
- dprintk ("PST:%d (@%p)\n", j, pst);
- dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
- pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
-
- ret = get_ranges ((char *) pst + sizeof (struct pst_s));
+ if ((etuple == pst->cpuid) &&
+ check_fsb(pst->fsbspeed) &&
+ (maxfid == pst->maxfid) &&
+ (startvid == pst->startvid)) {
+ print_pst_entry(pst, j);
+ p = (char *)pst + sizeof(struct pst_s);
+ ret = get_ranges(p);
return ret;
} else {
unsigned int k;
- p = (char *) pst + sizeof (struct pst_s);
- for (k=0; k<number_scales; k++)
- p+=2;
+ p = (char *)pst + sizeof(struct pst_s);
+ for (k = 0; k < number_scales; k++)
+ p += 2;
}
}
- printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple);
- printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n");
+ printk(KERN_INFO PFX "No PST tables match this cpuid "
+ "(0x%x)\n", etuple);
+ printk(KERN_INFO PFX "This is indicative of a broken "
+ "BIOS.\n");
return -EINVAL;
}
@@ -511,13 +538,14 @@ static int powernow_decode_bios (int maxfid, int startvid)
}
-static int powernow_target (struct cpufreq_policy *policy,
+static int powernow_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
unsigned int newstate;
- if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate))
+ if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
+ relation, &newstate))
return -EINVAL;
change_speed(newstate);
@@ -526,7 +554,7 @@ static int powernow_target (struct cpufreq_policy *policy,
}
-static int powernow_verify (struct cpufreq_policy *policy)
+static int powernow_verify(struct cpufreq_policy *policy)
{
return cpufreq_frequency_table_verify(policy, powernow_table);
}
@@ -566,18 +594,23 @@ static unsigned int powernow_get(unsigned int cpu)
if (cpu)
return 0;
- rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+ rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
cfid = fidvidstatus.bits.CFID;
- return (fsb * fid_codes[cfid] / 10);
+ return fsb * fid_codes[cfid] / 10;
}
static int __init acer_cpufreq_pst(const struct dmi_system_id *d)
{
- printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident);
- printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
- printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n");
+ printk(KERN_WARNING PFX
+ "%s laptop with broken PST tables in BIOS detected.\n",
+ d->ident);
+ printk(KERN_WARNING PFX
+ "You need to downgrade to 3A21 (09/09/2002), or try a newer "
+ "BIOS than 3A71 (01/20/2003)\n");
+ printk(KERN_WARNING PFX
+ "cpufreq scaling has been disabled as a result of this.\n");
return 0;
}
@@ -598,7 +631,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = {
{ }
};
-static int __init powernow_cpu_init (struct cpufreq_policy *policy)
+static int __init powernow_cpu_init(struct cpufreq_policy *policy)
{
union msr_fidvidstatus fidvidstatus;
int result;
@@ -606,7 +639,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
if (policy->cpu != 0)
return -ENODEV;
- rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+ rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
recalibrate_cpu_khz();
@@ -618,19 +651,21 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
dprintk("FSB: %3dMHz\n", fsb/1000);
if (dmi_check_system(powernow_dmi_table) || acpi_force) {
- printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n");
+ printk(KERN_INFO PFX "PSB/PST known to be broken. "
+ "Trying ACPI instead\n");
result = powernow_acpi_init();
} else {
- result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID);
+ result = powernow_decode_bios(fidvidstatus.bits.MFID,
+ fidvidstatus.bits.SVID);
if (result) {
- printk (KERN_INFO PFX "Trying ACPI perflib\n");
+ printk(KERN_INFO PFX "Trying ACPI perflib\n");
maximum_speed = 0;
minimum_speed = -1;
latency = 0;
result = powernow_acpi_init();
if (result) {
- printk (KERN_INFO PFX "ACPI and legacy methods failed\n");
- printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n");
+ printk(KERN_INFO PFX
+ "ACPI and legacy methods failed\n");
}
} else {
/* SGTC use the bus clock as timer */
@@ -642,10 +677,11 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
if (result)
return result;
- printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
+ printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
minimum_speed/1000, maximum_speed/1000);
- policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency);
+ policy->cpuinfo.transition_latency =
+ cpufreq_scale(2000000UL, fsb, latency);
policy->cur = powernow_get(0);
@@ -654,7 +690,8 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
}
-static int powernow_cpu_exit (struct cpufreq_policy *policy) {
+static int powernow_cpu_exit(struct cpufreq_policy *policy)
+{
cpufreq_frequency_table_put_attr(policy->cpu);
#ifdef CONFIG_X86_POWERNOW_K7_ACPI
@@ -669,7 +706,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
return 0;
}
-static struct freq_attr* powernow_table_attr[] = {
+static struct freq_attr *powernow_table_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -685,15 +722,15 @@ static struct cpufreq_driver powernow_driver = {
.attr = powernow_table_attr,
};
-static int __init powernow_init (void)
+static int __init powernow_init(void)
{
- if (check_powernow()==0)
+ if (check_powernow() == 0)
return -ENODEV;
return cpufreq_register_driver(&powernow_driver);
}
-static void __exit powernow_exit (void)
+static void __exit powernow_exit(void)
{
cpufreq_unregister_driver(&powernow_driver);
}
@@ -701,9 +738,9 @@ static void __exit powernow_exit (void)
module_param(acpi_force, int, 0444);
MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
-MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
-MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
+MODULE_LICENSE("GPL");
late_initcall(powernow_init);
module_exit(powernow_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6428aa17b40e..a15ac94e0b9b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -33,16 +33,14 @@
#include <linux/string.h>
#include <linux/cpumask.h>
#include <linux/sched.h> /* for current / set_cpus_allowed() */
+#include <linux/io.h>
+#include <linux/delay.h>
#include <asm/msr.h>
-#include <asm/io.h>
-#include <asm/delay.h>
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
#include <linux/acpi.h>
#include <linux/mutex.h>
#include <acpi/processor.h>
-#endif
#define PFX "powernow-k8: "
#define VERSION "version 2.20.00"
@@ -71,7 +69,8 @@ static u32 find_khz_freq_from_fid(u32 fid)
return 1000 * find_freq_from_fid(fid);
}
-static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate)
+static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
+ u32 pstate)
{
return data[pstate].frequency;
}
@@ -186,7 +185,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
return 1;
}
- lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+ lo = fid;
+ lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
+ lo |= MSR_C_LO_INIT_FID_VID;
dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
fid, lo, data->plllock * PLL_LOCK_CONVERSION);
@@ -194,7 +195,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
do {
wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
if (i++ > 100) {
- printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n");
+ printk(KERN_ERR PFX
+ "Hardware error - pending bit very stuck - "
+ "no further pstate changes possible\n");
return 1;
}
} while (query_current_values_with_pending_wait(data));
@@ -202,14 +205,16 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
count_off_irt(data);
if (savevid != data->currvid) {
- printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n",
- savevid, data->currvid);
+ printk(KERN_ERR PFX
+ "vid change on fid trans, old 0x%x, new 0x%x\n",
+ savevid, data->currvid);
return 1;
}
if (fid != data->currfid) {
- printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
- data->currfid);
+ printk(KERN_ERR PFX
+ "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
+ data->currfid);
return 1;
}
@@ -228,7 +233,9 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
return 1;
}
- lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID;
+ lo = data->currfid;
+ lo |= (vid << MSR_C_LO_VID_SHIFT);
+ lo |= MSR_C_LO_INIT_FID_VID;
dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
vid, lo, STOP_GRANT_5NS);
@@ -236,20 +243,24 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
do {
wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
if (i++ > 100) {
- printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+ printk(KERN_ERR PFX "internal error - pending bit "
+ "very stuck - no further pstate "
+ "changes possible\n");
return 1;
}
} while (query_current_values_with_pending_wait(data));
if (savefid != data->currfid) {
- printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",
+ printk(KERN_ERR PFX "fid changed on vid trans, old "
+ "0x%x new 0x%x\n",
savefid, data->currfid);
return 1;
}
if (vid != data->currvid) {
- printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid,
- data->currvid);
+ printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
+ "curr 0x%x\n",
+ vid, data->currvid);
return 1;
}
@@ -261,7 +272,8 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
* Decreasing vid codes represent increasing voltages:
* vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
*/
-static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step)
+static int decrease_vid_code_by_step(struct powernow_k8_data *data,
+ u32 reqvid, u32 step)
{
if ((data->currvid - reqvid) > step)
reqvid = data->currvid - step;
@@ -283,7 +295,8 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
}
/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
-static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
+static int transition_fid_vid(struct powernow_k8_data *data,
+ u32 reqfid, u32 reqvid)
{
if (core_voltage_pre_transition(data, reqvid))
return 1;
@@ -298,7 +311,8 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req
return 1;
if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
- printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n",
+ printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
+ "curr 0x%x 0x%x\n",
smp_processor_id(),
reqfid, reqvid, data->currfid, data->currvid);
return 1;
@@ -311,13 +325,15 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req
}
/* Phase 1 - core voltage transition ... setup voltage */
-static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid)
+static int core_voltage_pre_transition(struct powernow_k8_data *data,
+ u32 reqvid)
{
u32 rvosteps = data->rvo;
u32 savefid = data->currfid;
u32 maxvid, lo;
- dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n",
+ dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+ "reqvid 0x%x, rvo 0x%x\n",
smp_processor_id(),
data->currfid, data->currvid, reqvid, data->rvo);
@@ -340,7 +356,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
} else {
dprintk("ph1: changing vid for rvo, req 0x%x\n",
data->currvid - 1);
- if (decrease_vid_code_by_step(data, data->currvid - 1, 1))
+ if (decrease_vid_code_by_step(data, data->currvid-1, 1))
return 1;
rvosteps--;
}
@@ -350,7 +366,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
return 1;
if (savefid != data->currfid) {
- printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid);
+ printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
+ data->currfid);
return 1;
}
@@ -363,20 +380,24 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
/* Phase 2 - core frequency transition */
static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
{
- u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid;
+ u32 vcoreqfid, vcocurrfid, vcofiddiff;
+ u32 fid_interval, savevid = data->currvid;
- if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
- printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n",
- reqfid, data->currfid);
+ if ((reqfid < HI_FID_TABLE_BOTTOM) &&
+ (data->currfid < HI_FID_TABLE_BOTTOM)) {
+ printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
+ "0x%x 0x%x\n", reqfid, data->currfid);
return 1;
}
if (data->currfid == reqfid) {
- printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
+ printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
+ data->currfid);
return 0;
}
- dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n",
+ dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+ "reqfid 0x%x\n",
smp_processor_id(),
data->currfid, data->currvid, reqfid);
@@ -390,14 +411,14 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
if (reqfid > data->currfid) {
if (data->currfid > LO_FID_TABLE_TOP) {
- if (write_new_fid(data, data->currfid + fid_interval)) {
+ if (write_new_fid(data,
+ data->currfid + fid_interval))
return 1;
- }
} else {
if (write_new_fid
- (data, 2 + convert_fid_to_vco_fid(data->currfid))) {
+ (data,
+ 2 + convert_fid_to_vco_fid(data->currfid)))
return 1;
- }
}
} else {
if (write_new_fid(data, data->currfid - fid_interval))
@@ -417,7 +438,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
if (data->currfid != reqfid) {
printk(KERN_ERR PFX
- "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n",
+ "ph2: mismatch, failed fid transition, "
+ "curr 0x%x, req 0x%x\n",
data->currfid, reqfid);
return 1;
}
@@ -435,7 +457,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
}
/* Phase 3 - core voltage transition flow ... jump to the final vid. */
-static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid)
+static int core_voltage_post_transition(struct powernow_k8_data *data,
+ u32 reqvid)
{
u32 savefid = data->currfid;
u32 savereqvid = reqvid;
@@ -457,7 +480,8 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
if (data->currvid != reqvid) {
printk(KERN_ERR PFX
- "ph3: failed vid transition\n, req 0x%x, curr 0x%x",
+ "ph3: failed vid transition\n, "
+ "req 0x%x, curr 0x%x",
reqvid, data->currvid);
return 1;
}
@@ -508,7 +532,8 @@ static int check_supported_cpu(unsigned int cpu)
if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
- printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
+ printk(KERN_INFO PFX
+ "Processor cpuid %x not supported\n", eax);
goto out;
}
@@ -520,8 +545,10 @@ static int check_supported_cpu(unsigned int cpu)
}
cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
- if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
- printk(KERN_INFO PFX "Power state transitions not supported\n");
+ if ((edx & P_STATE_TRANSITION_CAPABLE)
+ != P_STATE_TRANSITION_CAPABLE) {
+ printk(KERN_INFO PFX
+ "Power state transitions not supported\n");
goto out;
}
} else { /* must be a HW Pstate capable processor */
@@ -539,7 +566,8 @@ out:
return rc;
}
-static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
+ u8 maxvid)
{
unsigned int j;
u8 lastfid = 0xff;
@@ -550,12 +578,14 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8
j, pst[j].vid);
return -EINVAL;
}
- if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */
+ if (pst[j].vid < data->rvo) {
+ /* vid + rvo >= 0 */
printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
" %d\n", j);
return -ENODEV;
}
- if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */
+ if (pst[j].vid < maxvid + data->rvo) {
+ /* vid + rvo >= maxvid */
printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
" %d\n", j);
return -ENODEV;
@@ -579,23 +609,31 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8
return -EINVAL;
}
if (lastfid > LO_FID_TABLE_TOP)
- printk(KERN_INFO FW_BUG PFX "first fid not from lo freq table\n");
+ printk(KERN_INFO FW_BUG PFX
+ "first fid not from lo freq table\n");
return 0;
}
+static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry)
+{
+ data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+}
+
static void print_basics(struct powernow_k8_data *data)
{
int j;
for (j = 0; j < data->numps; j++) {
- if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
+ if (data->powernow_table[j].frequency !=
+ CPUFREQ_ENTRY_INVALID) {
if (cpu_family == CPU_HW_PSTATE) {
- printk(KERN_INFO PFX " %d : pstate %d (%d MHz)\n",
- j,
+ printk(KERN_INFO PFX
+ " %d : pstate %d (%d MHz)\n", j,
data->powernow_table[j].index,
data->powernow_table[j].frequency/1000);
} else {
- printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n",
+ printk(KERN_INFO PFX
+ " %d : fid 0x%x (%d MHz), vid 0x%x\n",
j,
data->powernow_table[j].index & 0xff,
data->powernow_table[j].frequency/1000,
@@ -604,20 +642,25 @@ static void print_basics(struct powernow_k8_data *data)
}
}
if (data->batps)
- printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
+ printk(KERN_INFO PFX "Only %d pstates on battery\n",
+ data->batps);
}
-static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
+static int fill_powernow_table(struct powernow_k8_data *data,
+ struct pst_s *pst, u8 maxvid)
{
struct cpufreq_frequency_table *powernow_table;
unsigned int j;
- if (data->batps) { /* use ACPI support to get full speed on mains power */
- printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps);
+ if (data->batps) {
+ /* use ACPI support to get full speed on mains power */
+ printk(KERN_WARNING PFX
+ "Only %d pstates usable (use ACPI driver for full "
+ "range\n", data->batps);
data->numps = data->batps;
}
- for ( j=1; j<data->numps; j++ ) {
+ for (j = 1; j < data->numps; j++) {
if (pst[j-1].fid >= pst[j].fid) {
printk(KERN_ERR PFX "PST out of sequence\n");
return -EINVAL;
@@ -640,9 +683,11 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
}
for (j = 0; j < data->numps; j++) {
+ int freq;
powernow_table[j].index = pst[j].fid; /* lower 8 bits */
powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
- powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid);
+ freq = find_khz_freq_from_fid(pst[j].fid);
+ powernow_table[j].frequency = freq;
}
powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
powernow_table[data->numps].index = 0;
@@ -658,7 +703,8 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
print_basics(data);
for (j = 0; j < data->numps; j++)
- if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid))
+ if ((pst[j].fid == data->currfid) &&
+ (pst[j].vid == data->currvid))
return 0;
dprintk("currfid/vid do not match PST, ignoring\n");
@@ -698,7 +744,8 @@ static int find_psb_table(struct powernow_k8_data *data)
}
data->vstable = psb->vstable;
- dprintk("voltage stabilization time: %d(*20us)\n", data->vstable);
+ dprintk("voltage stabilization time: %d(*20us)\n",
+ data->vstable);
dprintk("flags2: 0x%x\n", psb->flags2);
data->rvo = psb->flags2 & 3;
@@ -713,11 +760,12 @@ static int find_psb_table(struct powernow_k8_data *data)
dprintk("numpst: 0x%x\n", psb->num_tables);
cpst = psb->num_tables;
- if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){
+ if ((psb->cpuid == 0x00000fc0) ||
+ (psb->cpuid == 0x00000fe0)) {
thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
- if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) {
+ if ((thiscpuid == 0x00000fc0) ||
+ (thiscpuid == 0x00000fe0))
cpst = 1;
- }
}
if (cpst != 1) {
printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
@@ -732,7 +780,8 @@ static int find_psb_table(struct powernow_k8_data *data)
data->numps = psb->numps;
dprintk("numpstates: 0x%x\n", data->numps);
- return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid);
+ return fill_powernow_table(data,
+ (struct pst_s *)(psb+1), maxvid);
}
/*
* If you see this message, complain to BIOS manufacturer. If
@@ -745,28 +794,31 @@ static int find_psb_table(struct powernow_k8_data *data)
* BIOS and Kernel Developer's Guide, which is available on
* www.amd.com
*/
- printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n");
+ printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
return -ENODEV;
}
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
+ unsigned int index)
{
+ acpi_integer control;
+
if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
return;
- data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
- data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
- data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
- data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
- data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
- data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
-}
+ control = data->acpi_data.states[index].control; data->irt = (control
+ >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >>
+ RVO_SHIFT) & RVO_MASK; data->exttype = (control
+ >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+ data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1
+ << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable =
+ (control >> VST_SHIFT) & VST_MASK; }
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
{
struct cpufreq_frequency_table *powernow_table;
int ret_val = -ENODEV;
+ acpi_integer space_id;
if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
@@ -779,11 +831,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
goto err_out;
}
- if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+ space_id = data->acpi_data.control_register.space_id;
+ if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+ (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
dprintk("Invalid control/status registers (%x - %x)\n",
data->acpi_data.control_register.space_id,
- data->acpi_data.status_register.space_id);
+ space_id);
goto err_out;
}
@@ -802,7 +855,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
if (ret_val)
goto err_out_mem;
- powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
+ powernow_table[data->acpi_data.state_count].frequency =
+ CPUFREQ_TABLE_END;
powernow_table[data->acpi_data.state_count].index = 0;
data->powernow_table = powernow_table;
@@ -830,13 +884,15 @@ err_out_mem:
err_out:
acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
- /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
+ /* data->acpi_data.state_count informs us at ->exit()
+ * whether ACPI was used */
data->acpi_data.state_count = 0;
return ret_val;
}
-static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+static int fill_powernow_table_pstate(struct powernow_k8_data *data,
+ struct cpufreq_frequency_table *powernow_table)
{
int i;
u32 hi = 0, lo = 0;
@@ -848,84 +904,101 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
if (index > data->max_hw_pstate) {
- printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
- printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ printk(KERN_ERR PFX "invalid pstate %d - "
+ "bad value %d.\n", i, index);
+ printk(KERN_ERR PFX "Please report to BIOS "
+ "manufacturer\n");
+ invalidate_entry(data, i);
continue;
}
rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
if (!(hi & HW_PSTATE_VALID_MASK)) {
dprintk("invalid pstate %d, ignoring\n", index);
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ invalidate_entry(data, i);
continue;
}
powernow_table[i].index = index;
- powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
+ powernow_table[i].frequency =
+ data->acpi_data.states[i].core_frequency * 1000;
}
return 0;
}
-static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
+ struct cpufreq_frequency_table *powernow_table)
{
int i;
int cntlofreq = 0;
+
for (i = 0; i < data->acpi_data.state_count; i++) {
u32 fid;
u32 vid;
+ u32 freq, index;
+ acpi_integer status, control;
if (data->exttype) {
- fid = data->acpi_data.states[i].status & EXT_FID_MASK;
- vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
+ status = data->acpi_data.states[i].status;
+ fid = status & EXT_FID_MASK;
+ vid = (status >> VID_SHIFT) & EXT_VID_MASK;
} else {
- fid = data->acpi_data.states[i].control & FID_MASK;
- vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+ control = data->acpi_data.states[i].control;
+ fid = control & FID_MASK;
+ vid = (control >> VID_SHIFT) & VID_MASK;
}
dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
- powernow_table[i].index = fid; /* lower 8 bits */
- powernow_table[i].index |= (vid << 8); /* upper 8 bits */
- powernow_table[i].frequency = find_khz_freq_from_fid(fid);
+ index = fid | (vid<<8);
+ powernow_table[i].index = index;
+
+ freq = find_khz_freq_from_fid(fid);
+ powernow_table[i].frequency = freq;
/* verify frequency is OK */
- if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) ||
- (powernow_table[i].frequency < (MIN_FREQ * 1000))) {
- dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency);
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
+ dprintk("invalid freq %u kHz, ignoring\n", freq);
+ invalidate_entry(data, i);
continue;
}
- /* verify voltage is OK - BIOSs are using "off" to indicate invalid */
+ /* verify voltage is OK -
+ * BIOSs are using "off" to indicate invalid */
if (vid == VID_OFF) {
dprintk("invalid vid %u, ignoring\n", vid);
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ invalidate_entry(data, i);
continue;
}
/* verify only 1 entry from the lo frequency table */
if (fid < HI_FID_TABLE_BOTTOM) {
if (cntlofreq) {
- /* if both entries are the same, ignore this one ... */
- if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
- (powernow_table[i].index != powernow_table[cntlofreq].index)) {
- printk(KERN_ERR PFX "Too many lo freq table entries\n");
+ /* if both entries are the same,
+ * ignore this one ... */
+ if ((freq != powernow_table[cntlofreq].frequency) ||
+ (index != powernow_table[cntlofreq].index)) {
+ printk(KERN_ERR PFX
+ "Too many lo freq table "
+ "entries\n");
return 1;
}
- dprintk("double low frequency table entry, ignoring it.\n");
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ dprintk("double low frequency table entry, "
+ "ignoring it.\n");
+ invalidate_entry(data, i);
continue;
} else
cntlofreq = i;
}
- if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
- printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
- powernow_table[i].frequency,
- (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
- powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
+ printk(KERN_INFO PFX "invalid freq entries "
+ "%u kHz vs. %u kHz\n", freq,
+ (unsigned int)
+ (data->acpi_data.states[i].core_frequency
+ * 1000));
+ invalidate_entry(data, i);
continue;
}
}
@@ -935,7 +1008,8 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
{
if (data->acpi_data.state_count)
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+ acpi_processor_unregister_performance(&data->acpi_data,
+ data->cpu);
free_cpumask_var(data->acpi_data.shared_cpu_map);
}
@@ -953,15 +1027,9 @@ static int get_transition_latency(struct powernow_k8_data *data)
return 1000 * max_latency;
}
-#else
-static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
-static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
-static int get_transition_latency(struct powernow_k8_data *data) { return 0; }
-#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
-
/* Take a frequency, and issue the fid/vid transition command */
-static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index)
+static int transition_frequency_fidvid(struct powernow_k8_data *data,
+ unsigned int index)
{
u32 fid = 0;
u32 vid = 0;
@@ -989,7 +1057,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
return 0;
}
- if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
+ if ((fid < HI_FID_TABLE_BOTTOM) &&
+ (data->currfid < HI_FID_TABLE_BOTTOM)) {
printk(KERN_ERR PFX
"ignoring illegal change in lo freq table-%x to 0x%x\n",
data->currfid, fid);
@@ -1017,7 +1086,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
}
/* Take a frequency, and issue the hardware pstate transition command */
-static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index)
+static int transition_frequency_pstate(struct powernow_k8_data *data,
+ unsigned int index)
{
u32 pstate = 0;
int res, i;
@@ -1029,7 +1099,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
pstate = index & HW_PSTATE_MASK;
if (pstate > data->max_hw_pstate)
return 0;
- freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
+ freqs.old = find_khz_freq_from_pstate(data->powernow_table,
+ data->currpstate);
freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
for_each_cpu_mask_nr(i, *(data->available_cores)) {
@@ -1048,7 +1119,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
}
/* Driver entry point to switch to the target frequency */
-static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation)
+static int powernowk8_target(struct cpufreq_policy *pol,
+ unsigned targfreq, unsigned relation)
{
cpumask_t oldmask;
struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
@@ -1087,14 +1159,18 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
dprintk("targ: curr fid 0x%x, vid 0x%x\n",
data->currfid, data->currvid);
- if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
+ if ((checkvid != data->currvid) ||
+ (checkfid != data->currfid)) {
printk(KERN_INFO PFX
- "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
- checkfid, data->currfid, checkvid, data->currvid);
+ "error - out of sync, fix 0x%x 0x%x, "
+ "vid 0x%x 0x%x\n",
+ checkfid, data->currfid,
+ checkvid, data->currvid);
}
}
- if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
+ if (cpufreq_frequency_table_target(pol, data->powernow_table,
+ targfreq, relation, &newstate))
goto err_out;
mutex_lock(&fidvid_mutex);
@@ -1114,7 +1190,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
mutex_unlock(&fidvid_mutex);
if (cpu_family == CPU_HW_PSTATE)
- pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate);
+ pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+ newstate);
else
pol->cur = find_khz_freq_from_fid(data->currfid);
ret = 0;
@@ -1141,6 +1218,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
struct powernow_k8_data *data;
cpumask_t oldmask;
int rc;
+ static int print_once;
if (!cpu_online(pol->cpu))
return -ENODEV;
@@ -1163,33 +1241,31 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
* an UP version, and is deprecated by AMD.
*/
if (num_online_cpus() != 1) {
-#ifndef CONFIG_ACPI_PROCESSOR
- printk(KERN_ERR PFX "ACPI Processor support is required "
- "for SMP systems but is absent. Please load the "
- "ACPI Processor module before starting this "
- "driver.\n");
-#else
- printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide"
- " ACPI _PSS objects in a way that Linux "
- "understands. Please report this to the Linux "
- "ACPI maintainers and complain to your BIOS "
- "vendor.\n");
-#endif
- kfree(data);
- return -ENODEV;
+ /*
+ * Replace this one with print_once as soon as such a
+ * thing gets introduced
+ */
+ if (!print_once) {
+ WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS "
+ "does not provide ACPI _PSS objects "
+ "in a way that Linux understands. "
+ "Please report this to the Linux ACPI"
+ " maintainers and complain to your "
+ "BIOS vendor.\n");
+ print_once++;
+ }
+ goto err_out;
}
if (pol->cpu != 0) {
printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
"CPU other than CPU0. Complain to your BIOS "
"vendor.\n");
- kfree(data);
- return -ENODEV;
+ goto err_out;
}
rc = find_psb_table(data);
- if (rc) {
- kfree(data);
- return -ENODEV;
- }
+ if (rc)
+ goto err_out;
+
/* Take a crude guess here.
* That guess was in microseconds, so multiply with 1000 */
pol->cpuinfo.transition_latency = (
@@ -1204,16 +1280,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
- goto err_out;
+ goto err_out_unmask;
}
if (pending_bit_stuck()) {
printk(KERN_ERR PFX "failing init, change pending bit set\n");
- goto err_out;
+ goto err_out_unmask;
}
if (query_current_values_with_pending_wait(data))
- goto err_out;
+ goto err_out_unmask;
if (cpu_family == CPU_OPTERON)
fidvid_msr_init();
@@ -1228,7 +1304,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
data->available_cores = pol->cpus;
if (cpu_family == CPU_HW_PSTATE)
- pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
+ pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+ data->currpstate);
else
pol->cur = find_khz_freq_from_fid(data->currfid);
dprintk("policy current frequency %d kHz\n", pol->cur);
@@ -1245,7 +1322,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
if (cpu_family == CPU_HW_PSTATE)
- dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate);
+ dprintk("cpu_init done, current pstate 0x%x\n",
+ data->currpstate);
else
dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
data->currfid, data->currvid);
@@ -1254,15 +1332,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
return 0;
-err_out:
+err_out_unmask:
set_cpus_allowed_ptr(current, &oldmask);
powernow_k8_cpu_exit_acpi(data);
+err_out:
kfree(data);
return -ENODEV;
}
-static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
+static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
{
struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
@@ -1279,7 +1358,7 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
return 0;
}
-static unsigned int powernowk8_get (unsigned int cpu)
+static unsigned int powernowk8_get(unsigned int cpu)
{
struct powernow_k8_data *data;
cpumask_t oldmask = current->cpus_allowed;
@@ -1315,7 +1394,7 @@ out:
return khz;
}
-static struct freq_attr* powernow_k8_attr[] = {
+static struct freq_attr *powernow_k8_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -1360,7 +1439,8 @@ static void __exit powernowk8_exit(void)
cpufreq_unregister_driver(&cpufreq_amd64_driver);
}
-MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
+MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
+ "Mark Langsdorf <mark.langsdorf@amd.com>");
MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 8ecc75b6c7c3..6c6698feade1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -45,11 +45,10 @@ struct powernow_k8_data {
* frequency is in kHz */
struct cpufreq_frequency_table *powernow_table;
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
/* the acpi table needs to be kept. it's only available if ACPI was
* used to determine valid frequency/vid/fid states */
struct acpi_processor_performance acpi_data;
-#endif
+
/* we need to keep track of associated cores, but let cpufreq
* handle hotplug events - so just point at cpufreq pol->cpus
* structure */
@@ -222,10 +221,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
-#ifdef CONFIG_X86_POWERNOW_K8_ACPI
static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
-#endif
#ifdef CONFIG_SMP
static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index 42da9bd677d6..435a996a613a 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -19,17 +19,19 @@
#include <linux/delay.h>
#include <linux/cpufreq.h>
+#include <linux/timex.h>
+#include <linux/io.h>
#include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
#define MMCR_BASE 0xfffef000 /* The default base address */
#define OFFS_CPUCTL 0x2 /* CPU Control Register */
static __u8 __iomem *cpuctl;
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "sc520_freq", msg)
+#define PFX "sc520_freq: "
static struct cpufreq_frequency_table sc520_freq_table[] = {
{0x01, 100000},
@@ -43,7 +45,8 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
switch (clockspeed_reg & 0x03) {
default:
- printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg);
+ printk(KERN_ERR PFX "error: cpuctl register has unexpected "
+ "value %02x\n", clockspeed_reg);
case 0x01:
return 100000;
case 0x02:
@@ -51,7 +54,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
}
}
-static void sc520_freq_set_cpu_state (unsigned int state)
+static void sc520_freq_set_cpu_state(unsigned int state)
{
struct cpufreq_freqs freqs;
@@ -76,18 +79,19 @@ static void sc520_freq_set_cpu_state (unsigned int state)
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
};
-static int sc520_freq_verify (struct cpufreq_policy *policy)
+static int sc520_freq_verify(struct cpufreq_policy *policy)
{
return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
}
-static int sc520_freq_target (struct cpufreq_policy *policy,
+static int sc520_freq_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
unsigned int newstate = 0;
- if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate))
+ if (cpufreq_frequency_table_target(policy, sc520_freq_table,
+ target_freq, relation, &newstate))
return -EINVAL;
sc520_freq_set_cpu_state(newstate);
@@ -116,7 +120,7 @@ static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
if (result)
- return (result);
+ return result;
cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
@@ -131,7 +135,7 @@ static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
}
-static struct freq_attr* sc520_freq_attr[] = {
+static struct freq_attr *sc520_freq_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -155,13 +159,13 @@ static int __init sc520_freq_init(void)
int err;
/* Test if we have the right hardware */
- if(c->x86_vendor != X86_VENDOR_AMD ||
- c->x86 != 4 || c->x86_model != 9) {
+ if (c->x86_vendor != X86_VENDOR_AMD ||
+ c->x86 != 4 || c->x86_model != 9) {
dprintk("no Elan SC520 processor found!\n");
return -ENODEV;
}
cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
- if(!cpuctl) {
+ if (!cpuctl) {
printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
return -ENOMEM;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index dedc1e98f168..8bbb11adb315 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev;
/* speedstep_processor
*/
-static unsigned int speedstep_processor = 0;
+static unsigned int speedstep_processor;
static u32 pmbase;
@@ -54,7 +54,8 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
};
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "speedstep-ich", msg)
/**
@@ -62,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
*
* Returns: -ENODEV if no register could be found
*/
-static int speedstep_find_register (void)
+static int speedstep_find_register(void)
{
if (!speedstep_chipset_dev)
return -ENODEV;
@@ -90,7 +91,7 @@ static int speedstep_find_register (void)
*
* Tries to change the SpeedStep state.
*/
-static void speedstep_set_state (unsigned int state)
+static void speedstep_set_state(unsigned int state)
{
u8 pm2_blk;
u8 value;
@@ -133,11 +134,11 @@ static void speedstep_set_state (unsigned int state)
dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
- if (state == (value & 0x1)) {
- dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000));
- } else {
- printk (KERN_ERR "cpufreq: change failed - I/O error\n");
- }
+ if (state == (value & 0x1))
+ dprintk("change to %u MHz succeeded\n",
+ speedstep_get_frequency(speedstep_processor) / 1000);
+ else
+ printk(KERN_ERR "cpufreq: change failed - I/O error\n");
return;
}
@@ -149,7 +150,7 @@ static void speedstep_set_state (unsigned int state)
* Tries to activate the SpeedStep status and control registers.
* Returns -EINVAL on an unsupported chipset, and zero on success.
*/
-static int speedstep_activate (void)
+static int speedstep_activate(void)
{
u16 value = 0;
@@ -175,20 +176,18 @@ static int speedstep_activate (void)
* functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
* chipset, or zero on failure.
*/
-static unsigned int speedstep_detect_chipset (void)
+static unsigned int speedstep_detect_chipset(void)
{
speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82801DB_12,
- PCI_ANY_ID,
- PCI_ANY_ID,
+ PCI_ANY_ID, PCI_ANY_ID,
NULL);
if (speedstep_chipset_dev)
return 4; /* 4-M */
speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82801CA_12,
- PCI_ANY_ID,
- PCI_ANY_ID,
+ PCI_ANY_ID, PCI_ANY_ID,
NULL);
if (speedstep_chipset_dev)
return 3; /* 3-M */
@@ -196,8 +195,7 @@ static unsigned int speedstep_detect_chipset (void)
speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82801BA_10,
- PCI_ANY_ID,
- PCI_ANY_ID,
+ PCI_ANY_ID, PCI_ANY_ID,
NULL);
if (speedstep_chipset_dev) {
/* speedstep.c causes lockups on Dell Inspirons 8000 and
@@ -208,8 +206,7 @@ static unsigned int speedstep_detect_chipset (void)
hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL,
PCI_DEVICE_ID_INTEL_82815_MC,
- PCI_ANY_ID,
- PCI_ANY_ID,
+ PCI_ANY_ID, PCI_ANY_ID,
NULL);
if (!hostbridge)
@@ -236,7 +233,7 @@ static unsigned int _speedstep_get(const struct cpumask *cpus)
cpus_allowed = current->cpus_allowed;
set_cpus_allowed_ptr(current, cpus);
- speed = speedstep_get_processor_frequency(speedstep_processor);
+ speed = speedstep_get_frequency(speedstep_processor);
set_cpus_allowed_ptr(current, &cpus_allowed);
dprintk("detected %u kHz as current frequency\n", speed);
return speed;
@@ -251,11 +248,12 @@ static unsigned int speedstep_get(unsigned int cpu)
* speedstep_target - set a new CPUFreq policy
* @policy: new policy
* @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @relation: how that frequency relates to achieved frequency
+ * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
*
* Sets a new CPUFreq policy.
*/
-static int speedstep_target (struct cpufreq_policy *policy,
+static int speedstep_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
@@ -264,7 +262,8 @@ static int speedstep_target (struct cpufreq_policy *policy,
cpumask_t cpus_allowed;
int i;
- if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+ if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+ target_freq, relation, &newstate))
return -EINVAL;
freqs.old = _speedstep_get(policy->cpus);
@@ -308,7 +307,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
* Limit must be within speedstep_low_freq and speedstep_high_freq, with
* at least one border included.
*/
-static int speedstep_verify (struct cpufreq_policy *policy)
+static int speedstep_verify(struct cpufreq_policy *policy)
{
return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
}
@@ -344,7 +343,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
return -EIO;
dprintk("currently at %s speed setting - %i MHz\n",
- (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+ (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+ ? "low" : "high",
(speed / 1000));
/* cpuinfo and default policy values */
@@ -352,9 +352,9 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
if (result)
- return (result);
+ return result;
- cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+ cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
return 0;
}
@@ -366,7 +366,7 @@ static int speedstep_cpu_exit(struct cpufreq_policy *policy)
return 0;
}
-static struct freq_attr* speedstep_attr[] = {
+static struct freq_attr *speedstep_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -396,13 +396,15 @@ static int __init speedstep_init(void)
/* detect processor */
speedstep_processor = speedstep_detect_processor();
if (!speedstep_processor) {
- dprintk("Intel(R) SpeedStep(TM) capable processor not found\n");
+ dprintk("Intel(R) SpeedStep(TM) capable processor "
+ "not found\n");
return -ENODEV;
}
/* detect chipset */
if (!speedstep_detect_chipset()) {
- dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n");
+ dprintk("Intel(R) SpeedStep(TM) for this chipset not "
+ "(yet) available.\n");
return -ENODEV;
}
@@ -431,9 +433,11 @@ static void __exit speedstep_exit(void)
}
-MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
+ "Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
+ "with ICH-M southbridges.");
+MODULE_LICENSE("GPL");
module_init(speedstep_init);
module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index cdac7d62369b..2e3c6862657b 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -16,12 +16,16 @@
#include <linux/slab.h>
#include <asm/msr.h>
+#include <asm/tsc.h>
#include "speedstep-lib.h"
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "speedstep-lib", msg)
+
+#define PFX "speedstep-lib: "
#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
-static int relaxed_check = 0;
+static int relaxed_check;
#else
#define relaxed_check 0
#endif
@@ -30,14 +34,14 @@ static int relaxed_check = 0;
* GET PROCESSOR CORE SPEED IN KHZ *
*********************************************************************/
-static unsigned int pentium3_get_frequency (unsigned int processor)
+static unsigned int pentium3_get_frequency(unsigned int processor)
{
- /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
+ /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
struct {
unsigned int ratio; /* Frequency Multiplier (x10) */
u8 bitmap; /* power on configuration bits
[27, 25:22] (in MSR 0x2a) */
- } msr_decode_mult [] = {
+ } msr_decode_mult[] = {
{ 30, 0x01 },
{ 35, 0x05 },
{ 40, 0x02 },
@@ -52,7 +56,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
{ 85, 0x26 },
{ 90, 0x20 },
{ 100, 0x2b },
- { 0, 0xff } /* error or unknown value */
+ { 0, 0xff } /* error or unknown value */
};
/* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
@@ -60,7 +64,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
unsigned int value; /* Front Side Bus speed in MHz */
u8 bitmap; /* power on configuration bits [18: 19]
(in MSR 0x2a) */
- } msr_decode_fsb [] = {
+ } msr_decode_fsb[] = {
{ 66, 0x0 },
{ 100, 0x2 },
{ 133, 0x1 },
@@ -85,7 +89,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
}
/* decode the multiplier */
- if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) {
+ if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
dprintk("workaround for early PIIIs\n");
msr_lo &= 0x03c00000;
} else
@@ -97,9 +101,10 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
j++;
}
- dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
+ dprintk("speed is %u\n",
+ (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
- return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100);
+ return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
}
@@ -112,20 +117,23 @@ static unsigned int pentiumM_get_frequency(void)
/* see table B-2 of 24547212.pdf */
if (msr_lo & 0x00040000) {
- printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp);
+ printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n",
+ msr_lo, msr_tmp);
return 0;
}
msr_tmp = (msr_lo >> 22) & 0x1f;
- dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000));
+ dprintk("bits 22-26 are 0x%x, speed is %u\n",
+ msr_tmp, (msr_tmp * 100 * 1000));
- return (msr_tmp * 100 * 1000);
+ return msr_tmp * 100 * 1000;
}
static unsigned int pentium_core_get_frequency(void)
{
u32 fsb = 0;
u32 msr_lo, msr_tmp;
+ int ret;
rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
/* see table B-2 of 25366920.pdf */
@@ -153,12 +161,15 @@ static unsigned int pentium_core_get_frequency(void)
}
rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
- dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+ dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
+ msr_lo, msr_tmp);
msr_tmp = (msr_lo >> 22) & 0x1f;
- dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb));
+ dprintk("bits 22-26 are 0x%x, speed is %u\n",
+ msr_tmp, (msr_tmp * fsb));
- return (msr_tmp * fsb);
+ ret = (msr_tmp * fsb);
+ return ret;
}
@@ -167,6 +178,16 @@ static unsigned int pentium4_get_frequency(void)
struct cpuinfo_x86 *c = &boot_cpu_data;
u32 msr_lo, msr_hi, mult;
unsigned int fsb = 0;
+ unsigned int ret;
+ u8 fsb_code;
+
+ /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency
+ * to System Bus Frequency Ratio Field in the Processor Frequency
+ * Configuration Register of the MSR. Therefore the current
+ * frequency cannot be calculated and has to be measured.
+ */
+ if (c->x86_model < 2)
+ return cpu_khz;
rdmsr(0x2c, msr_lo, msr_hi);
@@ -177,62 +198,61 @@ static unsigned int pentium4_get_frequency(void)
* revision #12 in Table B-1: MSRs in the Pentium 4 and
* Intel Xeon Processors, on page B-4 and B-5.
*/
- if (c->x86_model < 2)
+ fsb_code = (msr_lo >> 16) & 0x7;
+ switch (fsb_code) {
+ case 0:
fsb = 100 * 1000;
- else {
- u8 fsb_code = (msr_lo >> 16) & 0x7;
- switch (fsb_code) {
- case 0:
- fsb = 100 * 1000;
- break;
- case 1:
- fsb = 13333 * 10;
- break;
- case 2:
- fsb = 200 * 1000;
- break;
- }
+ break;
+ case 1:
+ fsb = 13333 * 10;
+ break;
+ case 2:
+ fsb = 200 * 1000;
+ break;
}
if (!fsb)
- printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n");
+ printk(KERN_DEBUG PFX "couldn't detect FSB speed. "
+ "Please send an e-mail to <linux@brodo.de>\n");
/* Multiplier. */
mult = msr_lo >> 24;
- dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult));
+ dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
+ fsb, mult, (fsb * mult));
- return (fsb * mult);
+ ret = (fsb * mult);
+ return ret;
}
-unsigned int speedstep_get_processor_frequency(unsigned int processor)
+unsigned int speedstep_get_frequency(unsigned int processor)
{
switch (processor) {
- case SPEEDSTEP_PROCESSOR_PCORE:
+ case SPEEDSTEP_CPU_PCORE:
return pentium_core_get_frequency();
- case SPEEDSTEP_PROCESSOR_PM:
+ case SPEEDSTEP_CPU_PM:
return pentiumM_get_frequency();
- case SPEEDSTEP_PROCESSOR_P4D:
- case SPEEDSTEP_PROCESSOR_P4M:
+ case SPEEDSTEP_CPU_P4D:
+ case SPEEDSTEP_CPU_P4M:
return pentium4_get_frequency();
- case SPEEDSTEP_PROCESSOR_PIII_T:
- case SPEEDSTEP_PROCESSOR_PIII_C:
- case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
+ case SPEEDSTEP_CPU_PIII_T:
+ case SPEEDSTEP_CPU_PIII_C:
+ case SPEEDSTEP_CPU_PIII_C_EARLY:
return pentium3_get_frequency(processor);
default:
return 0;
};
return 0;
}
-EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency);
+EXPORT_SYMBOL_GPL(speedstep_get_frequency);
/*********************************************************************
* DETECT SPEEDSTEP-CAPABLE PROCESSOR *
*********************************************************************/
-unsigned int speedstep_detect_processor (void)
+unsigned int speedstep_detect_processor(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
u32 ebx, msr_lo, msr_hi;
@@ -261,7 +281,7 @@ unsigned int speedstep_detect_processor (void)
* sample has ebx = 0x0f, production has 0x0e.
*/
if ((ebx == 0x0e) || (ebx == 0x0f))
- return SPEEDSTEP_PROCESSOR_P4M;
+ return SPEEDSTEP_CPU_P4M;
break;
case 7:
/*
@@ -272,7 +292,7 @@ unsigned int speedstep_detect_processor (void)
* samples are only of B-stepping...
*/
if (ebx == 0x0e)
- return SPEEDSTEP_PROCESSOR_P4M;
+ return SPEEDSTEP_CPU_P4M;
break;
case 9:
/*
@@ -288,10 +308,13 @@ unsigned int speedstep_detect_processor (void)
* M-P4-Ms may have either ebx=0xe or 0xf [see above]
* M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
* also, M-P4M HTs have ebx=0x8, too
- * For now, they are distinguished by the model_id string
+ * For now, they are distinguished by the model_id
+ * string
*/
- if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL))
- return SPEEDSTEP_PROCESSOR_P4M;
+ if ((ebx == 0x0e) ||
+ (strstr(c->x86_model_id,
+ "Mobile Intel(R) Pentium(R) 4") != NULL))
+ return SPEEDSTEP_CPU_P4M;
break;
default:
break;
@@ -301,7 +324,8 @@ unsigned int speedstep_detect_processor (void)
switch (c->x86_model) {
case 0x0B: /* Intel PIII [Tualatin] */
- /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */
+ /* cpuid_ebx(1) is 0x04 for desktop PIII,
+ * 0x06 for mobile PIII-M */
ebx = cpuid_ebx(0x00000001);
dprintk("ebx is %x\n", ebx);
@@ -313,14 +337,15 @@ unsigned int speedstep_detect_processor (void)
/* So far all PIII-M processors support SpeedStep. See
* Intel's 24540640.pdf of June 2003
*/
- return SPEEDSTEP_PROCESSOR_PIII_T;
+ return SPEEDSTEP_CPU_PIII_T;
case 0x08: /* Intel PIII [Coppermine] */
/* all mobile PIII Coppermines have FSB 100 MHz
* ==> sort out a few desktop PIIIs. */
rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
- dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi);
+ dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
+ msr_lo, msr_hi);
msr_lo &= 0x00c0000;
if (msr_lo != 0x0080000)
return 0;
@@ -332,13 +357,15 @@ unsigned int speedstep_detect_processor (void)
* bit 56 or 57 is set
*/
rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
- dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi);
- if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
+ dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
+ msr_lo, msr_hi);
+ if ((msr_hi & (1<<18)) &&
+ (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
if (c->x86_mask == 0x01) {
dprintk("early PIII version\n");
- return SPEEDSTEP_PROCESSOR_PIII_C_EARLY;
+ return SPEEDSTEP_CPU_PIII_C_EARLY;
} else
- return SPEEDSTEP_PROCESSOR_PIII_C;
+ return SPEEDSTEP_CPU_PIII_C;
}
default:
@@ -369,7 +396,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
dprintk("trying to determine both speeds\n");
/* get current speed */
- prev_speed = speedstep_get_processor_frequency(processor);
+ prev_speed = speedstep_get_frequency(processor);
if (!prev_speed)
return -EIO;
@@ -379,7 +406,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
/* switch to low state */
set_state(SPEEDSTEP_LOW);
- *low_speed = speedstep_get_processor_frequency(processor);
+ *low_speed = speedstep_get_frequency(processor);
if (!*low_speed) {
ret = -EIO;
goto out;
@@ -398,7 +425,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
if (transition_latency)
do_gettimeofday(&tv2);
- *high_speed = speedstep_get_processor_frequency(processor);
+ *high_speed = speedstep_get_frequency(processor);
if (!*high_speed) {
ret = -EIO;
goto out;
@@ -426,9 +453,12 @@ unsigned int speedstep_get_freqs(unsigned int processor,
/* check if the latency measurement is too high or too low
* and set it to a safe value (500uSec) in that case
*/
- if (*transition_latency > 10000000 || *transition_latency < 50000) {
- printk (KERN_WARNING "speedstep: frequency transition measured seems out of "
- "range (%u nSec), falling back to a safe one of %u nSec.\n",
+ if (*transition_latency > 10000000 ||
+ *transition_latency < 50000) {
+ printk(KERN_WARNING PFX "frequency transition "
+ "measured seems out of range (%u "
+ "nSec), falling back to a safe one of"
+ "%u nSec.\n",
*transition_latency, 500000);
*transition_latency = 500000;
}
@@ -436,15 +466,16 @@ unsigned int speedstep_get_freqs(unsigned int processor,
out:
local_irq_restore(flags);
- return (ret);
+ return ret;
}
EXPORT_SYMBOL_GPL(speedstep_get_freqs);
#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
module_param(relaxed_check, int, 0444);
-MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability.");
+MODULE_PARM_DESC(relaxed_check,
+ "Don't do all checks for speedstep capability.");
#endif
-MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
index b11bcc608cac..2b6c04e5a304 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
@@ -12,17 +12,17 @@
/* processors */
-#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */
-#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */
-#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */
-#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */
+#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */
+#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */
+#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */
+#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */
/* the following processors are not speedstep-capable and are not auto-detected
* in speedstep_detect_processor(). However, their speed can be detected using
- * the speedstep_get_processor_frequency() call. */
-#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */
-#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */
-#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */
+ * the speedstep_get_frequency() call. */
+#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */
+#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */
+#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */
/* speedstep states -- only two of them */
@@ -34,7 +34,7 @@
extern unsigned int speedstep_detect_processor (void);
/* detect the current speed (in khz) of the processor */
-extern unsigned int speedstep_get_processor_frequency(unsigned int processor);
+extern unsigned int speedstep_get_frequency(unsigned int processor);
/* detect the low and high speeds of the processor. The callback
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 8a85c93bd62a..befea088e4f5 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -19,8 +19,8 @@
#include <linux/cpufreq.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/io.h>
#include <asm/ist.h>
-#include <asm/io.h>
#include "speedstep-lib.h"
@@ -30,12 +30,12 @@
* If user gives it, these are used.
*
*/
-static int smi_port = 0;
-static int smi_cmd = 0;
-static unsigned int smi_sig = 0;
+static int smi_port;
+static int smi_cmd;
+static unsigned int smi_sig;
/* info about the processor */
-static unsigned int speedstep_processor = 0;
+static unsigned int speedstep_processor;
/*
* There are only two frequency states for each processor. Values
@@ -56,12 +56,13 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
* of DMA activity going on? */
#define SMI_TRIES 5
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg)
+#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
+ "speedstep-smi", msg)
/**
* speedstep_smi_ownership
*/
-static int speedstep_smi_ownership (void)
+static int speedstep_smi_ownership(void)
{
u32 command, result, magic, dummy;
u32 function = GET_SPEEDSTEP_OWNER;
@@ -70,16 +71,18 @@ static int speedstep_smi_ownership (void)
command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
magic = virt_to_phys(magic_data);
- dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
+ dprintk("trying to obtain ownership with command %x at port %x\n",
+ command, smi_port);
__asm__ __volatile__(
"push %%ebp\n"
"out %%al, (%%dx)\n"
"pop %%ebp\n"
- : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
- "=S" (dummy)
+ : "=D" (result),
+ "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
+ "=S" (dummy)
: "a" (command), "b" (function), "c" (0), "d" (smi_port),
- "D" (0), "S" (magic)
+ "D" (0), "S" (magic)
: "memory"
);
@@ -97,10 +100,10 @@ static int speedstep_smi_ownership (void)
* even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
* shows that the latter occurs if !(ist_info.event & 0xFFFF).
*/
-static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
+static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
{
u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
- u32 state=0;
+ u32 state = 0;
u32 function = GET_SPEEDSTEP_FREQS;
if (!(ist_info.event & 0xFFFF)) {
@@ -110,17 +113,25 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
- dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
+ dprintk("trying to determine frequencies with command %x at port %x\n",
+ command, smi_port);
__asm__ __volatile__(
"push %%ebp\n"
"out %%al, (%%dx)\n"
"pop %%ebp"
- : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy)
- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
+ : "=a" (result),
+ "=b" (high_mhz),
+ "=c" (low_mhz),
+ "=d" (state), "=D" (edi), "=S" (dummy)
+ : "a" (command),
+ "b" (function),
+ "c" (state),
+ "d" (smi_port), "S" (0), "D" (0)
);
- dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
+ dprintk("result %x, low_freq %u, high_freq %u\n",
+ result, low_mhz, high_mhz);
/* abort if results are obviously incorrect... */
if ((high_mhz + low_mhz) < 600)
@@ -137,26 +148,30 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
* @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
*
*/
-static int speedstep_get_state (void)
+static int speedstep_get_state(void)
{
- u32 function=GET_SPEEDSTEP_STATE;
+ u32 function = GET_SPEEDSTEP_STATE;
u32 result, state, edi, command, dummy;
command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
- dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
+ dprintk("trying to determine current setting with command %x "
+ "at port %x\n", command, smi_port);
__asm__ __volatile__(
"push %%ebp\n"
"out %%al, (%%dx)\n"
"pop %%ebp\n"
- : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy)
- : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0)
+ : "=a" (result),
+ "=b" (state), "=D" (edi),
+ "=c" (dummy), "=d" (dummy), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (0),
+ "d" (smi_port), "S" (0), "D" (0)
);
dprintk("state is %x, result is %x\n", state, result);
- return (state & 1);
+ return state & 1;
}
@@ -165,11 +180,11 @@ static int speedstep_get_state (void)
* @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
*
*/
-static void speedstep_set_state (unsigned int state)
+static void speedstep_set_state(unsigned int state)
{
unsigned int result = 0, command, new_state, dummy;
unsigned long flags;
- unsigned int function=SET_SPEEDSTEP_STATE;
+ unsigned int function = SET_SPEEDSTEP_STATE;
unsigned int retry = 0;
if (state > 0x1)
@@ -180,11 +195,14 @@ static void speedstep_set_state (unsigned int state)
command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
- dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port);
+ dprintk("trying to set frequency to state %u "
+ "with command %x at port %x\n",
+ state, command, smi_port);
do {
if (retry) {
- dprintk("retry %u, previous result %u, waiting...\n", retry, result);
+ dprintk("retry %u, previous result %u, waiting...\n",
+ retry, result);
mdelay(retry * 50);
}
retry++;
@@ -192,20 +210,26 @@ static void speedstep_set_state (unsigned int state)
"push %%ebp\n"
"out %%al, (%%dx)\n"
"pop %%ebp"
- : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy),
- "=d" (dummy), "=S" (dummy)
- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
+ : "=b" (new_state), "=D" (result),
+ "=c" (dummy), "=a" (dummy),
+ "=d" (dummy), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (state),
+ "d" (smi_port), "S" (0), "D" (0)
);
} while ((new_state != state) && (retry <= SMI_TRIES));
/* enable IRQs */
local_irq_restore(flags);
- if (new_state == state) {
- dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
- } else {
- printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result);
- }
+ if (new_state == state)
+ dprintk("change to %u MHz succeeded after %u tries "
+ "with result %u\n",
+ (speedstep_freqs[new_state].frequency / 1000),
+ retry, result);
+ else
+ printk(KERN_ERR "cpufreq: change to state %u "
+ "failed with new_state %u and result %u\n",
+ state, new_state, result);
return;
}
@@ -219,13 +243,14 @@ static void speedstep_set_state (unsigned int state)
*
* Sets a new CPUFreq policy/freq.
*/
-static int speedstep_target (struct cpufreq_policy *policy,
+static int speedstep_target(struct cpufreq_policy *policy,
unsigned int target_freq, unsigned int relation)
{
unsigned int newstate = 0;
struct cpufreq_freqs freqs;
- if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate))
+ if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+ target_freq, relation, &newstate))
return -EINVAL;
freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
@@ -250,7 +275,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
* Limit must be within speedstep_low_freq and speedstep_high_freq, with
* at least one border included.
*/
-static int speedstep_verify (struct cpufreq_policy *policy)
+static int speedstep_verify(struct cpufreq_policy *policy)
{
return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
}
@@ -259,7 +284,8 @@ static int speedstep_verify (struct cpufreq_policy *policy)
static int speedstep_cpu_init(struct cpufreq_policy *policy)
{
int result;
- unsigned int speed,state;
+ unsigned int speed, state;
+ unsigned int *low, *high;
/* capability check */
if (policy->cpu != 0)
@@ -272,19 +298,23 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
}
/* detect low and high frequency */
- result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency,
- &speedstep_freqs[SPEEDSTEP_HIGH].frequency);
+ low = &speedstep_freqs[SPEEDSTEP_LOW].frequency;
+ high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency;
+
+ result = speedstep_smi_get_freqs(low, high);
if (result) {
- /* fall back to speedstep_lib.c dection mechanism: try both states out */
- dprintk("could not detect low and high frequencies by SMI call.\n");
+ /* fall back to speedstep_lib.c dection mechanism:
+ * try both states out */
+ dprintk("could not detect low and high frequencies "
+ "by SMI call.\n");
result = speedstep_get_freqs(speedstep_processor,
- &speedstep_freqs[SPEEDSTEP_LOW].frequency,
- &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+ low, high,
NULL,
&speedstep_set_state);
if (result) {
- dprintk("could not detect two different speeds -- aborting.\n");
+ dprintk("could not detect two different speeds"
+ " -- aborting.\n");
return result;
} else
dprintk("workaround worked.\n");
@@ -295,7 +325,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
speed = speedstep_freqs[state].frequency;
dprintk("currently at %s speed setting - %i MHz\n",
- (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high",
+ (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+ ? "low" : "high",
(speed / 1000));
/* cpuinfo and default policy values */
@@ -304,7 +335,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
if (result)
- return (result);
+ return result;
cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
@@ -321,7 +352,7 @@ static unsigned int speedstep_get(unsigned int cpu)
{
if (cpu)
return -ENODEV;
- return speedstep_get_processor_frequency(speedstep_processor);
+ return speedstep_get_frequency(speedstep_processor);
}
@@ -335,7 +366,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
return result;
}
-static struct freq_attr* speedstep_attr[] = {
+static struct freq_attr *speedstep_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
NULL,
};
@@ -364,21 +395,23 @@ static int __init speedstep_init(void)
speedstep_processor = speedstep_detect_processor();
switch (speedstep_processor) {
- case SPEEDSTEP_PROCESSOR_PIII_T:
- case SPEEDSTEP_PROCESSOR_PIII_C:
- case SPEEDSTEP_PROCESSOR_PIII_C_EARLY:
+ case SPEEDSTEP_CPU_PIII_T:
+ case SPEEDSTEP_CPU_PIII_C:
+ case SPEEDSTEP_CPU_PIII_C_EARLY:
break;
default:
speedstep_processor = 0;
}
if (!speedstep_processor) {
- dprintk ("No supported Intel CPU detected.\n");
+ dprintk("No supported Intel CPU detected.\n");
return -ENODEV;
}
- dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n",
- ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level);
+ dprintk("signature:0x%.8lx, command:0x%.8lx, "
+ "event:0x%.8lx, perf_level:0x%.8lx.\n",
+ ist_info.signature, ist_info.command,
+ ist_info.event, ist_info.perf_level);
/* Error if no IST-SMI BIOS or no PARM
sig= 'ISGE' aka 'Intel Speedstep Gate E' */
@@ -416,17 +449,20 @@ static void __exit speedstep_exit(void)
cpufreq_unregister_driver(&speedstep_driver);
}
-module_param(smi_port, int, 0444);
-module_param(smi_cmd, int, 0444);
-module_param(smi_sig, uint, 0444);
+module_param(smi_port, int, 0444);
+module_param(smi_cmd, int, 0444);
+module_param(smi_sig, uint, 0444);
-MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2");
-MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82");
-MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface.");
+MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value "
+ "-- Intel's default setting is 0xb2");
+MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value "
+ "-- Intel's default setting is 0x82");
+MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the "
+ "SMI interface.");
-MODULE_AUTHOR ("Hiroshi Miura");
-MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Hiroshi Miura");
+MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface.");
+MODULE_LICENSE("GPL");
module_init(speedstep_init);
module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 25c559ba8d54..1a89a2b68d15 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
#include <linux/string.h>
#include <linux/bitops.h>
#include <linux/smp.h>
+#include <linux/sched.h>
#include <linux/thread_info.h>
#include <linux/module.h>
@@ -55,11 +56,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
/*
* c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
- * with P/T states and does not stop in deep C-states
+ * with P/T states and does not stop in deep C-states.
+ *
+ * It is also reliable across cores and sockets. (but not across
+ * cabinets - we turn it off in that case explicitly.)
*/
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+ sched_clock_stable = 1;
}
/*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 169a120587be..87b67e3a765a 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -729,7 +729,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
spin_unlock_irqrestore(&ds_lock, irq);
- ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+ ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
ds_resume_pebs(tracer);
return tracer;
@@ -1029,5 +1029,4 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
void ds_exit_thread(struct task_struct *tsk)
{
- WARN_ON(tsk->thread.ds_ctx);
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index b205272ad394..1736acc4d7aa 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -469,7 +469,7 @@ void __init efi_enter_virtual_mode(void)
efi_memory_desc_t *md;
efi_status_t status;
unsigned long size;
- u64 end, systab, addr, npages;
+ u64 end, systab, addr, npages, end_pfn;
void *p, *va;
efi.systab = NULL;
@@ -481,7 +481,10 @@ void __init efi_enter_virtual_mode(void)
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
- if (PFN_UP(end) <= max_low_pfn_mapped)
+ end_pfn = PFN_UP(end);
+ if (end_pfn <= max_low_pfn_mapped
+ || (end_pfn > (1UL << (32 - PAGE_SHIFT))
+ && end_pfn <= max_pfn_mapped))
va = __va(md->phys_addr);
else
va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index a4ee29127fdf..22c3b7828c50 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -100,24 +100,11 @@ void __init efi_call_phys_epilog(void)
void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
{
- static unsigned pages_mapped __initdata;
- unsigned i, pages;
- unsigned long offset;
+ unsigned long last_map_pfn;
- pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr);
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
-
- if (pages_mapped + pages > MAX_EFI_IO_PAGES)
+ last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
+ if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
return NULL;
- for (i = 0; i < pages; i++) {
- __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
- phys_addr, PAGE_KERNEL);
- phys_addr += PAGE_SIZE;
- pages_mapped++;
- }
-
- return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \
- (pages_mapped - pages)) + offset;
+ return (void __iomem *)__va(phys_addr);
}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a00545fe5cdd..648b3a2a3a44 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -80,6 +80,7 @@ static inline void hpet_clear_mapping(void)
*/
static int boot_hpet_disable;
int hpet_force_user;
+static int hpet_verbose;
static int __init hpet_setup(char *str)
{
@@ -88,6 +89,8 @@ static int __init hpet_setup(char *str)
boot_hpet_disable = 1;
if (!strncmp("force", str, 5))
hpet_force_user = 1;
+ if (!strncmp("verbose", str, 7))
+ hpet_verbose = 1;
}
return 1;
}
@@ -119,6 +122,43 @@ int is_hpet_enabled(void)
}
EXPORT_SYMBOL_GPL(is_hpet_enabled);
+static void _hpet_print_config(const char *function, int line)
+{
+ u32 i, timers, l, h;
+ printk(KERN_INFO "hpet: %s(%d):\n", function, line);
+ l = hpet_readl(HPET_ID);
+ h = hpet_readl(HPET_PERIOD);
+ timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
+ printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h);
+ l = hpet_readl(HPET_CFG);
+ h = hpet_readl(HPET_STATUS);
+ printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h);
+ l = hpet_readl(HPET_COUNTER);
+ h = hpet_readl(HPET_COUNTER+4);
+ printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
+
+ for (i = 0; i < timers; i++) {
+ l = hpet_readl(HPET_Tn_CFG(i));
+ h = hpet_readl(HPET_Tn_CFG(i)+4);
+ printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n",
+ i, l, h);
+ l = hpet_readl(HPET_Tn_CMP(i));
+ h = hpet_readl(HPET_Tn_CMP(i)+4);
+ printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n",
+ i, l, h);
+ l = hpet_readl(HPET_Tn_ROUTE(i));
+ h = hpet_readl(HPET_Tn_ROUTE(i)+4);
+ printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n",
+ i, l, h);
+ }
+}
+
+#define hpet_print_config() \
+do { \
+ if (hpet_verbose) \
+ _hpet_print_config(__FUNCTION__, __LINE__); \
+} while (0)
+
/*
* When the hpet driver (/dev/hpet) is enabled, we need to reserve
* timer 0 and timer 1 in case of RTC emulation.
@@ -191,27 +231,37 @@ static struct clock_event_device hpet_clockevent = {
.rating = 50,
};
-static void hpet_start_counter(void)
+static void hpet_stop_counter(void)
{
unsigned long cfg = hpet_readl(HPET_CFG);
-
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
hpet_writel(0, HPET_COUNTER);
hpet_writel(0, HPET_COUNTER + 4);
+}
+
+static void hpet_start_counter(void)
+{
+ unsigned long cfg = hpet_readl(HPET_CFG);
cfg |= HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
+static void hpet_restart_counter(void)
+{
+ hpet_stop_counter();
+ hpet_start_counter();
+}
+
static void hpet_resume_device(void)
{
force_hpet_resume();
}
-static void hpet_restart_counter(void)
+static void hpet_resume_counter(void)
{
hpet_resume_device();
- hpet_start_counter();
+ hpet_restart_counter();
}
static void hpet_enable_legacy_int(void)
@@ -259,29 +309,23 @@ static int hpet_setup_msi_irq(unsigned int irq);
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
{
- unsigned long cfg, cmp, now;
+ unsigned long cfg;
uint64_t delta;
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
+ hpet_stop_counter();
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
delta >>= evt->shift;
- now = hpet_readl(HPET_COUNTER);
- cmp = now + (unsigned long) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
/* Make sure we use edge triggered interrupts */
cfg &= ~HPET_TN_LEVEL;
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
- /*
- * The first write after writing TN_SETVAL to the
- * config register sets the counter value, the second
- * write sets the period.
- */
- hpet_writel(cmp, HPET_Tn_CMP(timer));
- udelay(1);
hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
+ hpet_start_counter();
+ hpet_print_config();
break;
case CLOCK_EVT_MODE_ONESHOT:
@@ -308,6 +352,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
+ hpet_print_config();
break;
}
}
@@ -526,6 +571,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
num_timers++; /* Value read out starts from 0 */
+ hpet_print_config();
hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
if (!hpet_devs)
@@ -695,7 +741,7 @@ static struct clocksource clocksource_hpet = {
.mask = HPET_MASK,
.shift = HPET_SHIFT,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
- .resume = hpet_restart_counter,
+ .resume = hpet_resume_counter,
#ifdef CONFIG_X86_64
.vread = vread_hpet,
#endif
@@ -707,7 +753,7 @@ static int hpet_clocksource_register(void)
cycle_t t1;
/* Start the counter */
- hpet_start_counter();
+ hpet_restart_counter();
/* Verify whether hpet counter works */
t1 = read_hpet();
@@ -793,6 +839,7 @@ int __init hpet_enable(void)
* information and the number of channels
*/
id = hpet_readl(HPET_ID);
+ hpet_print_config();
#ifdef CONFIG_HPET_EMULATE_RTC
/*
@@ -845,6 +892,7 @@ static __init int hpet_late_init(void)
return -ENODEV;
hpet_reserve_platform_timers(hpet_readl(HPET_ID));
+ hpet_print_config();
for_each_online_cpu(cpu) {
hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index b0f61f0dcd0a..f2f8540a7f3d 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -136,7 +136,7 @@ int init_fpu(struct task_struct *tsk)
#ifdef CONFIG_X86_32
if (!HAVE_HWFP) {
memset(tsk->thread.xstate, 0, xstate_size);
- finit();
+ finit_task(tsk);
set_stopped_child_used_math(tsk);
return 0;
}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e948b28a5a9a..4558dd3918cf 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -193,6 +193,9 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes)
kprobe_opcode_t opcode;
kprobe_opcode_t *orig_opcodes = opcodes;
+ if (search_exception_tables(opcodes))
+ return 0; /* Page fault may occur on this address. */
+
retry:
if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
return 0;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 309949e9e1c1..697d1b78cfbf 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -172,7 +172,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4,
ich_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
ich_force_enable_hpet);
-
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */
+ ich_force_enable_hpet);
static struct pci_dev *cached_dev;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1cc18d439bbb..2aef36d8aca2 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -216,6 +216,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
},
},
+ { /* Handle problems with rebooting on Dell XPS710 */
+ .callback = set_bios_reboot,
+ .ident = "Dell XPS710",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4c54bc0d8ff3..b746deb9ebc6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -770,6 +770,9 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing();
+ if (efi_enabled)
+ efi_init();
+
dmi_scan_machine();
dmi_check_system(bad_bios_dmi_table);
@@ -789,8 +792,6 @@ void __init setup_arch(char **cmdline_p)
insert_resource(&iomem_resource, &data_resource);
insert_resource(&iomem_resource, &bss_resource);
- if (efi_enabled)
- efi_init();
#ifdef CONFIG_X86_32
if (ppro_with_ram_bug()) {
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 83d53ce5d4c4..7a567ebe6361 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -17,20 +17,21 @@
#include <asm/delay.h>
#include <asm/hypervisor.h>
-unsigned int cpu_khz; /* TSC clocks / usec, not used here */
+unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
-unsigned int tsc_khz;
+
+unsigned int __read_mostly tsc_khz;
EXPORT_SYMBOL(tsc_khz);
/*
* TSC can be unstable due to cpufreq or due to unsynced TSCs
*/
-static int tsc_unstable;
+static int __read_mostly tsc_unstable;
/* native_sched_clock() is called before tsc_init(), so
we must start with the TSC soft disabled to prevent
erroneous rdtsc usage on !cpu_has_tsc processors */
-static int tsc_disabled = -1;
+static int __read_mostly tsc_disabled = -1;
static int tsc_clocksource_reliable;
/*
@@ -273,30 +274,43 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
* use the TSC value at the transitions to calculate a pretty
* good value for the TSC frequencty.
*/
-static inline int pit_expect_msb(unsigned char val)
+static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
{
- int count = 0;
+ int count;
+ u64 tsc = 0;
for (count = 0; count < 50000; count++) {
/* Ignore LSB */
inb(0x42);
if (inb(0x42) != val)
break;
+ tsc = get_cycles();
}
- return count > 50;
+ *deltap = get_cycles() - tsc;
+ *tscp = tsc;
+
+ /*
+ * We require _some_ success, but the quality control
+ * will be based on the error terms on the TSC values.
+ */
+ return count > 5;
}
/*
- * How many MSB values do we want to see? We aim for a
- * 15ms calibration, which assuming a 2us counter read
- * error should give us roughly 150 ppm precision for
- * the calibration.
+ * How many MSB values do we want to see? We aim for
+ * a maximum error rate of 500ppm (in practice the
+ * real error is much smaller), but refuse to spend
+ * more than 25ms on it.
*/
-#define QUICK_PIT_MS 15
-#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
+#define MAX_QUICK_PIT_MS 25
+#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
static unsigned long quick_pit_calibrate(void)
{
+ int i;
+ u64 tsc, delta;
+ unsigned long d1, d2;
+
/* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
@@ -315,45 +329,52 @@ static unsigned long quick_pit_calibrate(void)
outb(0xff, 0x42);
outb(0xff, 0x42);
- if (pit_expect_msb(0xff)) {
- int i;
- u64 t1, t2, delta;
- unsigned char expect = 0xfe;
-
- t1 = get_cycles();
- for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) {
- if (!pit_expect_msb(expect))
- goto failed;
+ /*
+ * The PIT starts counting at the next edge, so we
+ * need to delay for a microsecond. The easiest way
+ * to do that is to just read back the 16-bit counter
+ * once from the PIT.
+ */
+ inb(0x42);
+ inb(0x42);
+
+ if (pit_expect_msb(0xff, &tsc, &d1)) {
+ for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
+ if (!pit_expect_msb(0xff-i, &delta, &d2))
+ break;
+
+ /*
+ * Iterate until the error is less than 500 ppm
+ */
+ delta -= tsc;
+ if (d1+d2 < delta >> 11)
+ goto success;
}
- t2 = get_cycles();
-
- /*
- * Make sure we can rely on the second TSC timestamp:
- */
- if (!pit_expect_msb(expect))
- goto failed;
-
- /*
- * Ok, if we get here, then we've seen the
- * MSB of the PIT decrement QUICK_PIT_ITERATIONS
- * times, and each MSB had many hits, so we never
- * had any sudden jumps.
- *
- * As a result, we can depend on there not being
- * any odd delays anywhere, and the TSC reads are
- * reliable.
- *
- * kHz = ticks / time-in-seconds / 1000;
- * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
- * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
- */
- delta = (t2 - t1)*PIT_TICK_RATE;
- do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
- printk("Fast TSC calibration using PIT\n");
- return delta;
}
-failed:
+ printk("Fast TSC calibration failed\n");
return 0;
+
+success:
+ /*
+ * Ok, if we get here, then we've seen the
+ * MSB of the PIT decrement 'i' times, and the
+ * error has shrunk to less than 500 ppm.
+ *
+ * As a result, we can depend on there not being
+ * any odd delays anywhere, and the TSC reads are
+ * reliable (within the error). We also adjust the
+ * delta to the middle of the error bars, just
+ * because it looks nicer.
+ *
+ * kHz = ticks / time-in-seconds / 1000;
+ * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
+ * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
+ */
+ delta += (long)(d2 - d1)/2;
+ delta *= PIT_TICK_RATE;
+ do_div(delta, i*256*1000);
+ printk("Fast TSC calibration using PIT\n");
+ return delta;
}
/**
@@ -523,8 +544,6 @@ unsigned long native_calibrate_tsc(void)
return tsc_pit_min;
}
-#ifdef CONFIG_X86_32
-/* Only called from the Powernow K7 cpu freq driver */
int recalibrate_cpu_khz(void)
{
#ifndef CONFIG_SMP
@@ -546,7 +565,6 @@ int recalibrate_cpu_khz(void)
EXPORT_SYMBOL(recalibrate_cpu_khz);
-#endif /* CONFIG_X86_32 */
/* Accelerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bdee..0a303c3ed11f 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -4,6 +4,10 @@
config HAVE_KVM
bool
+config HAVE_KVM_IRQCHIP
+ bool
+ default y
+
menuconfig VIRTUALIZATION
bool "Virtualization"
depends on HAVE_KVM || X86
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 72bd275a9b5c..c13bb92d3157 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
if (!atomic_inc_and_test(&pt->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
+ if (!pt->reinject)
+ atomic_set(&pt->pending, 1);
+
if (vcpu0 && waitqueue_active(&vcpu0->wq))
wake_up_interruptible(&vcpu0->wq);
@@ -536,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit)
pit->pit_state.irq_ack = 1;
}
+static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
+{
+ struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
+
+ if (!mask) {
+ atomic_set(&pit->pit_state.pit_timer.pending, 0);
+ pit->pit_state.irq_ack = 1;
+ }
+}
+
struct kvm_pit *kvm_create_pit(struct kvm *kvm)
{
struct kvm_pit *pit;
@@ -545,9 +558,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
if (!pit)
return NULL;
- mutex_lock(&kvm->lock);
pit->irq_source_id = kvm_request_irq_source_id(kvm);
- mutex_unlock(&kvm->lock);
if (pit->irq_source_id < 0) {
kfree(pit);
return NULL;
@@ -580,10 +591,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
pit_state->irq_ack_notifier.gsi = 0;
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+ pit_state->pit_timer.reinject = true;
mutex_unlock(&pit->pit_state.lock);
kvm_pit_reset(pit);
+ pit->mask_notifier.func = pit_mask_notifer;
+ kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+
return pit;
}
@@ -592,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm)
struct hrtimer *timer;
if (kvm->arch.vpit) {
+ kvm_unregister_irq_mask_notifier(kvm, 0,
+ &kvm->arch.vpit->mask_notifier);
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 4178022b97aa..6acbe4b505d5 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
s64 period; /* unit: ns */
s64 scheduled;
atomic_t pending;
+ bool reinject;
};
struct kvm_kpit_channel_state {
@@ -45,6 +46,7 @@ struct kvm_pit {
struct kvm *kvm;
struct kvm_kpit_state pit_state;
int irq_source_id;
+ struct kvm_irq_mask_notifier mask_notifier;
};
#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 179dcb0103fd..1ccb50c74f18 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,11 +32,13 @@
#include <linux/kvm_host.h>
static void pic_lock(struct kvm_pic *s)
+ __acquires(&s->lock)
{
spin_lock(&s->lock);
}
static void pic_unlock(struct kvm_pic *s)
+ __releases(&s->lock)
{
struct kvm *kvm = s->kvm;
unsigned acks = s->pending_acks;
@@ -49,7 +51,8 @@ static void pic_unlock(struct kvm_pic *s)
spin_unlock(&s->lock);
while (acks) {
- kvm_notify_acked_irq(kvm, __ffs(acks));
+ kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
+ __ffs(acks));
acks &= acks - 1;
}
@@ -76,12 +79,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm)
/*
* set irq level. If an edge is detected, then the IRR is set to 1
*/
-static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
+static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
{
- int mask;
+ int mask, ret = 1;
mask = 1 << irq;
if (s->elcr & mask) /* level triggered */
if (level) {
+ ret = !(s->irr & mask);
s->irr |= mask;
s->last_irr |= mask;
} else {
@@ -90,11 +94,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
}
else /* edge triggered */
if (level) {
- if ((s->last_irr & mask) == 0)
+ if ((s->last_irr & mask) == 0) {
+ ret = !(s->irr & mask);
s->irr |= mask;
+ }
s->last_irr |= mask;
} else
s->last_irr &= ~mask;
+
+ return (s->imr & mask) ? -1 : ret;
}
/*
@@ -171,16 +179,19 @@ void kvm_pic_update_irq(struct kvm_pic *s)
pic_unlock(s);
}
-void kvm_pic_set_irq(void *opaque, int irq, int level)
+int kvm_pic_set_irq(void *opaque, int irq, int level)
{
struct kvm_pic *s = opaque;
+ int ret = -1;
pic_lock(s);
if (irq >= 0 && irq < PIC_NUM_PINS) {
- pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+ ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
}
pic_unlock(s);
+
+ return ret;
}
/*
@@ -232,7 +243,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
}
pic_update_irq(s);
pic_unlock(s);
- kvm_notify_acked_irq(kvm, irq);
+ kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
return intno;
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 82579ee538d0..9f593188129e 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
#include "lapic.h"
#define PIC_NUM_PINS 16
+#define SELECT_PIC(irq) \
+ ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
struct kvm;
struct kvm_vcpu;
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 8e5ee99551f6..ed66e4c078dc 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = {
};
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
-#define NUM_DB_REGS 4
struct kvm_vcpu;
@@ -29,18 +28,23 @@ struct vcpu_svm {
struct svm_cpu_data *svm_data;
uint64_t asid_generation;
- unsigned long db_regs[NUM_DB_REGS];
-
u64 next_rip;
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
u64 host_gs_base;
unsigned long host_cr2;
- unsigned long host_db_regs[NUM_DB_REGS];
- unsigned long host_dr6;
- unsigned long host_dr7;
u32 *msrpm;
+ struct vmcb *hsave;
+ u64 hsave_msr;
+
+ u64 nested_vmcb;
+
+ /* These are the merged vectors */
+ u32 *nested_msrpm;
+
+ /* gpa pointers to the real vectors */
+ u64 nested_vmcb_msrpm;
};
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2d4477c71473..2a36f7f7c4c7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,20 @@ struct kvm_rmap_desc {
struct kvm_rmap_desc *more;
};
-struct kvm_shadow_walk {
- int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu,
- u64 addr, u64 *spte, int level);
+struct kvm_shadow_walk_iterator {
+ u64 addr;
+ hpa_t shadow_addr;
+ int level;
+ u64 *sptep;
+ unsigned index;
};
+#define for_each_shadow_entry(_vcpu, _addr, _walker) \
+ for (shadow_walk_init(&(_walker), _vcpu, _addr); \
+ shadow_walk_okay(&(_walker)); \
+ shadow_walk_next(&(_walker)))
+
+
struct kvm_unsync_walk {
int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
};
@@ -343,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
BUG_ON(!mc->nobjs);
p = mc->objects[--mc->nobjs];
- memset(p, 0, size);
return p;
}
@@ -794,10 +802,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&sp->oos_link);
- ASSERT(is_empty_shadow_page(sp->spt));
bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
- sp->global = 1;
sp->parent_pte = parent_pte;
--vcpu->kvm->arch.n_free_mmu_pages;
return sp;
@@ -983,8 +989,8 @@ struct kvm_mmu_pages {
idx < 512; \
idx = find_next_bit(bitmap, 512, idx+1))
-int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
- int idx)
+static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
+ int idx)
{
int i;
@@ -1059,7 +1065,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry(sp, node, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical
+ if (sp->gfn == gfn && !sp->role.direct
&& !sp->role.invalid) {
pgprintk("%s: found role %x\n",
__func__, sp->role.word);
@@ -1115,8 +1121,9 @@ struct mmu_page_path {
i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
i = mmu_pages_next(&pvec, &parents, i))
-int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
- int i)
+static int mmu_pages_next(struct kvm_mmu_pages *pvec,
+ struct mmu_page_path *parents,
+ int i)
{
int n;
@@ -1135,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
return n;
}
-void mmu_pages_clear_parents(struct mmu_page_path *parents)
+static void mmu_pages_clear_parents(struct mmu_page_path *parents)
{
struct kvm_mmu_page *sp;
unsigned int level = 0;
@@ -1193,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
unsigned level,
- int metaphysical,
+ int direct,
unsigned access,
u64 *parent_pte)
{
@@ -1204,10 +1211,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp;
struct hlist_node *node, *tmp;
- role.word = 0;
- role.glevels = vcpu->arch.mmu.root_level;
+ role = vcpu->arch.mmu.base_role;
role.level = level;
- role.metaphysical = metaphysical;
+ role.direct = direct;
role.access = access;
if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1242,8 +1248,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
sp->gfn = gfn;
sp->role = role;
+ sp->global = role.cr4_pge;
hlist_add_head(&sp->hash_link, bucket);
- if (!metaphysical) {
+ if (!direct) {
if (rmap_write_protect(vcpu->kvm, gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
account_shadowed(vcpu->kvm, gfn);
@@ -1255,35 +1262,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
return sp;
}
-static int walk_shadow(struct kvm_shadow_walk *walker,
- struct kvm_vcpu *vcpu, u64 addr)
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+ struct kvm_vcpu *vcpu, u64 addr)
{
- hpa_t shadow_addr;
- int level;
- int r;
- u64 *sptep;
- unsigned index;
-
- shadow_addr = vcpu->arch.mmu.root_hpa;
- level = vcpu->arch.mmu.shadow_root_level;
- if (level == PT32E_ROOT_LEVEL) {
- shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
- shadow_addr &= PT64_BASE_ADDR_MASK;
- if (!shadow_addr)
- return 1;
- --level;
+ iterator->addr = addr;
+ iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
+ iterator->level = vcpu->arch.mmu.shadow_root_level;
+ if (iterator->level == PT32E_ROOT_LEVEL) {
+ iterator->shadow_addr
+ = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+ iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
+ --iterator->level;
+ if (!iterator->shadow_addr)
+ iterator->level = 0;
}
+}
- while (level >= PT_PAGE_TABLE_LEVEL) {
- index = SHADOW_PT_INDEX(addr, level);
- sptep = ((u64 *)__va(shadow_addr)) + index;
- r = walker->entry(walker, vcpu, addr, sptep, level);
- if (r)
- return r;
- shadow_addr = *sptep & PT64_BASE_ADDR_MASK;
- --level;
- }
- return 0;
+static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
+{
+ if (iterator->level < PT_PAGE_TABLE_LEVEL)
+ return false;
+ iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
+ iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
+ return true;
+}
+
+static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
+{
+ iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
+ --iterator->level;
}
static void kvm_mmu_page_unlink_children(struct kvm *kvm,
@@ -1388,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_page_unlink_children(kvm, sp);
kvm_mmu_unlink_parents(kvm, sp);
kvm_flush_remote_tlbs(kvm);
- if (!sp->role.invalid && !sp->role.metaphysical)
+ if (!sp->role.invalid && !sp->role.direct)
unaccount_shadowed(kvm, sp->gfn);
if (sp->unsync)
kvm_unlink_unsync_page(kvm, sp);
@@ -1451,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical) {
+ if (sp->gfn == gfn && !sp->role.direct) {
pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
@@ -1463,11 +1470,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
+ unsigned index;
+ struct hlist_head *bucket;
struct kvm_mmu_page *sp;
+ struct hlist_node *node, *nn;
- while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
- pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
- kvm_mmu_zap_page(kvm, sp);
+ index = kvm_page_table_hashfn(gfn);
+ bucket = &kvm->arch.mmu_page_hash[index];
+ hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
+ if (sp->gfn == gfn && !sp->role.direct
+ && !sp->role.invalid) {
+ pgprintk("%s: zap %lx %x\n",
+ __func__, gfn, sp->role.word);
+ kvm_mmu_zap_page(kvm, sp);
+ }
}
}
@@ -1622,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
/* don't unsync if pagetable is shadowed with multiple roles */
hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
- if (s->gfn != sp->gfn || s->role.metaphysical)
+ if (s->gfn != sp->gfn || s->role.direct)
continue;
if (s->role.word != sp->role.word)
return 1;
@@ -1669,8 +1685,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
u64 mt_mask = shadow_mt_mask;
struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
- if (!(vcpu->arch.cr4 & X86_CR4_PGE))
- global = 0;
if (!global && sp->global) {
sp->global = 0;
if (sp->unsync) {
@@ -1777,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
pgprintk("hfn old %lx new %lx\n",
spte_to_pfn(*shadow_pte), pfn);
rmap_remove(vcpu->kvm, shadow_pte);
- } else {
- if (largepage)
- was_rmapped = is_large_pte(*shadow_pte);
- else
- was_rmapped = 1;
- }
+ } else
+ was_rmapped = 1;
}
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
dirty, largepage, global, gfn, pfn, speculative, true)) {
@@ -1820,67 +1830,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}
-struct direct_shadow_walk {
- struct kvm_shadow_walk walker;
- pfn_t pfn;
- int write;
- int largepage;
- int pt_write;
-};
-
-static int direct_map_entry(struct kvm_shadow_walk *_walk,
- struct kvm_vcpu *vcpu,
- u64 addr, u64 *sptep, int level)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+ int largepage, gfn_t gfn, pfn_t pfn)
{
- struct direct_shadow_walk *walk =
- container_of(_walk, struct direct_shadow_walk, walker);
+ struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
+ int pt_write = 0;
gfn_t pseudo_gfn;
- gfn_t gfn = addr >> PAGE_SHIFT;
-
- if (level == PT_PAGE_TABLE_LEVEL
- || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
- mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
- 0, walk->write, 1, &walk->pt_write,
- walk->largepage, 0, gfn, walk->pfn, false);
- ++vcpu->stat.pf_fixed;
- return 1;
- }
- if (*sptep == shadow_trap_nonpresent_pte) {
- pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
- sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1,
- 1, ACC_ALL, sptep);
- if (!sp) {
- pgprintk("nonpaging_map: ENOMEM\n");
- kvm_release_pfn_clean(walk->pfn);
- return -ENOMEM;
+ for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
+ if (iterator.level == PT_PAGE_TABLE_LEVEL
+ || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
+ mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+ 0, write, 1, &pt_write,
+ largepage, 0, gfn, pfn, false);
+ ++vcpu->stat.pf_fixed;
+ break;
}
- set_shadow_pte(sptep,
- __pa(sp->spt)
- | PT_PRESENT_MASK | PT_WRITABLE_MASK
- | shadow_user_mask | shadow_x_mask);
- }
- return 0;
-}
+ if (*iterator.sptep == shadow_trap_nonpresent_pte) {
+ pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
+ sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
+ iterator.level - 1,
+ 1, ACC_ALL, iterator.sptep);
+ if (!sp) {
+ pgprintk("nonpaging_map: ENOMEM\n");
+ kvm_release_pfn_clean(pfn);
+ return -ENOMEM;
+ }
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
- int largepage, gfn_t gfn, pfn_t pfn)
-{
- int r;
- struct direct_shadow_walk walker = {
- .walker = { .entry = direct_map_entry, },
- .pfn = pfn,
- .largepage = largepage,
- .write = write,
- .pt_write = 0,
- };
-
- r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
- if (r < 0)
- return r;
- return walker.pt_write;
+ set_shadow_pte(iterator.sptep,
+ __pa(sp->spt)
+ | PT_PRESENT_MASK | PT_WRITABLE_MASK
+ | shadow_user_mask | shadow_x_mask);
+ }
+ }
+ return pt_write;
}
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
@@ -1962,7 +1947,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
int i;
gfn_t root_gfn;
struct kvm_mmu_page *sp;
- int metaphysical = 0;
+ int direct = 0;
root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
@@ -1971,18 +1956,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
if (tdp_enabled)
- metaphysical = 1;
+ direct = 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, metaphysical,
+ PT64_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->arch.mmu.root_hpa = root;
return;
}
- metaphysical = !is_paging(vcpu);
+ direct = !is_paging(vcpu);
if (tdp_enabled)
- metaphysical = 1;
+ direct = 1;
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
@@ -1996,7 +1981,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
} else if (vcpu->arch.mmu.root_level == 0)
root_gfn = 0;
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
- PT32_ROOT_LEVEL, metaphysical,
+ PT32_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
@@ -2251,17 +2236,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
+ int r;
+
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
if (!is_paging(vcpu))
- return nonpaging_init_context(vcpu);
+ r = nonpaging_init_context(vcpu);
else if (is_long_mode(vcpu))
- return paging64_init_context(vcpu);
+ r = paging64_init_context(vcpu);
else if (is_pae(vcpu))
- return paging32E_init_context(vcpu);
+ r = paging32E_init_context(vcpu);
else
- return paging32_init_context(vcpu);
+ r = paging32_init_context(vcpu);
+
+ vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
+
+ return r;
}
static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -2492,7 +2483,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
- if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid)
+ if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
continue;
pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
@@ -3130,7 +3121,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
gfn_t gfn;
list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
- if (sp->role.metaphysical)
+ if (sp->role.direct)
continue;
gfn = unalias_gfn(vcpu->kvm, sp->gfn);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 258e5d56298e..eaab2145f62b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -54,7 +54,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
- return vcpu->arch.shadow_efer & EFER_LME;
+ return vcpu->arch.shadow_efer & EFER_LMA;
#else
return 0;
#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9fd78b6e17ad..6bd70206c561 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,7 +25,6 @@
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
- #define shadow_walker shadow_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -42,7 +41,6 @@
#elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
- #define shadow_walker shadow_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -73,18 +71,6 @@ struct guest_walker {
u32 error_code;
};
-struct shadow_walker {
- struct kvm_shadow_walk walker;
- struct guest_walker *guest_walker;
- int user_fault;
- int write_fault;
- int largepage;
- int *ptwrite;
- pfn_t pfn;
- u64 *sptep;
- gpa_t pte_gpa;
-};
-
static gfn_t gpte_to_gfn(pt_element_t gpte)
{
return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -283,91 +269,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
-static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
- struct kvm_vcpu *vcpu, u64 addr,
- u64 *sptep, int level)
+static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+ struct guest_walker *gw,
+ int user_fault, int write_fault, int largepage,
+ int *ptwrite, pfn_t pfn)
{
- struct shadow_walker *sw =
- container_of(_sw, struct shadow_walker, walker);
- struct guest_walker *gw = sw->guest_walker;
unsigned access = gw->pt_access;
struct kvm_mmu_page *shadow_page;
- u64 spte;
- int metaphysical;
+ u64 spte, *sptep;
+ int direct;
gfn_t table_gfn;
int r;
+ int level;
pt_element_t curr_pte;
+ struct kvm_shadow_walk_iterator iterator;
- if (level == PT_PAGE_TABLE_LEVEL
- || (sw->largepage && level == PT_DIRECTORY_LEVEL)) {
- mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
- sw->user_fault, sw->write_fault,
- gw->ptes[gw->level-1] & PT_DIRTY_MASK,
- sw->ptwrite, sw->largepage,
- gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
- gw->gfn, sw->pfn, false);
- sw->sptep = sptep;
- return 1;
- }
+ if (!is_present_pte(gw->ptes[gw->level - 1]))
+ return NULL;
- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
- return 0;
+ for_each_shadow_entry(vcpu, addr, iterator) {
+ level = iterator.level;
+ sptep = iterator.sptep;
+ if (level == PT_PAGE_TABLE_LEVEL
+ || (largepage && level == PT_DIRECTORY_LEVEL)) {
+ mmu_set_spte(vcpu, sptep, access,
+ gw->pte_access & access,
+ user_fault, write_fault,
+ gw->ptes[gw->level-1] & PT_DIRTY_MASK,
+ ptwrite, largepage,
+ gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+ gw->gfn, pfn, false);
+ break;
+ }
- if (is_large_pte(*sptep)) {
- set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
- kvm_flush_remote_tlbs(vcpu->kvm);
- rmap_remove(vcpu->kvm, sptep);
- }
+ if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
+ continue;
- if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) {
- metaphysical = 1;
- if (!is_dirty_pte(gw->ptes[level - 1]))
- access &= ~ACC_WRITE_MASK;
- table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
- } else {
- metaphysical = 0;
- table_gfn = gw->table_gfn[level - 2];
- }
- shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
- metaphysical, access, sptep);
- if (!metaphysical) {
- r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
- &curr_pte, sizeof(curr_pte));
- if (r || curr_pte != gw->ptes[level - 2]) {
- kvm_mmu_put_page(shadow_page, sptep);
- kvm_release_pfn_clean(sw->pfn);
- sw->sptep = NULL;
- return 1;
+ if (is_large_pte(*sptep)) {
+ rmap_remove(vcpu->kvm, sptep);
+ set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(vcpu->kvm);
}
- }
- spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK
- | PT_WRITABLE_MASK | PT_USER_MASK;
- *sptep = spte;
- return 0;
-}
-
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
- struct guest_walker *guest_walker,
- int user_fault, int write_fault, int largepage,
- int *ptwrite, pfn_t pfn)
-{
- struct shadow_walker walker = {
- .walker = { .entry = FNAME(shadow_walk_entry), },
- .guest_walker = guest_walker,
- .user_fault = user_fault,
- .write_fault = write_fault,
- .largepage = largepage,
- .ptwrite = ptwrite,
- .pfn = pfn,
- };
-
- if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1]))
- return NULL;
+ if (level == PT_DIRECTORY_LEVEL
+ && gw->level == PT_DIRECTORY_LEVEL) {
+ direct = 1;
+ if (!is_dirty_pte(gw->ptes[level - 1]))
+ access &= ~ACC_WRITE_MASK;
+ table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
+ } else {
+ direct = 0;
+ table_gfn = gw->table_gfn[level - 2];
+ }
+ shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+ direct, access, sptep);
+ if (!direct) {
+ r = kvm_read_guest_atomic(vcpu->kvm,
+ gw->pte_gpa[level - 2],
+ &curr_pte, sizeof(curr_pte));
+ if (r || curr_pte != gw->ptes[level - 2]) {
+ kvm_mmu_put_page(shadow_page, sptep);
+ kvm_release_pfn_clean(pfn);
+ sptep = NULL;
+ break;
+ }
+ }
- walk_shadow(&walker.walker, vcpu, addr);
+ spte = __pa(shadow_page->spt)
+ | PT_PRESENT_MASK | PT_ACCESSED_MASK
+ | PT_WRITABLE_MASK | PT_USER_MASK;
+ *sptep = spte;
+ }
- return walker.sptep;
+ return sptep;
}
/*
@@ -465,54 +439,56 @@ out_unlock:
return 0;
}
-static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
- struct kvm_vcpu *vcpu, u64 addr,
- u64 *sptep, int level)
+static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
- struct shadow_walker *sw =
- container_of(_sw, struct shadow_walker, walker);
-
- /* FIXME: properly handle invlpg on large guest pages */
- if (level == PT_PAGE_TABLE_LEVEL ||
- ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
- struct kvm_mmu_page *sp = page_header(__pa(sptep));
+ struct kvm_shadow_walk_iterator iterator;
+ pt_element_t gpte;
+ gpa_t pte_gpa = -1;
+ int level;
+ u64 *sptep;
+ int need_flush = 0;
- sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
- sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+ spin_lock(&vcpu->kvm->mmu_lock);
- if (is_shadow_present_pte(*sptep)) {
- rmap_remove(vcpu->kvm, sptep);
- if (is_large_pte(*sptep))
- --vcpu->kvm->stat.lpages;
+ for_each_shadow_entry(vcpu, gva, iterator) {
+ level = iterator.level;
+ sptep = iterator.sptep;
+
+ /* FIXME: properly handle invlpg on large guest pages */
+ if (level == PT_PAGE_TABLE_LEVEL ||
+ ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
+ struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+ pte_gpa = (sp->gfn << PAGE_SHIFT);
+ pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
+ if (is_shadow_present_pte(*sptep)) {
+ rmap_remove(vcpu->kvm, sptep);
+ if (is_large_pte(*sptep))
+ --vcpu->kvm->stat.lpages;
+ need_flush = 1;
+ }
+ set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+ break;
}
- set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
- return 1;
- }
- if (!is_shadow_present_pte(*sptep))
- return 1;
- return 0;
-}
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
-{
- pt_element_t gpte;
- struct shadow_walker walker = {
- .walker = { .entry = FNAME(shadow_invlpg_entry), },
- .pte_gpa = -1,
- };
+ if (!is_shadow_present_pte(*sptep))
+ break;
+ }
- spin_lock(&vcpu->kvm->mmu_lock);
- walk_shadow(&walker.walker, vcpu, gva);
+ if (need_flush)
+ kvm_flush_remote_tlbs(vcpu->kvm);
spin_unlock(&vcpu->kvm->mmu_lock);
- if (walker.pte_gpa == -1)
+
+ if (pte_gpa == -1)
return;
- if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+ if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
sizeof(pt_element_t)))
return;
if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
if (mmu_topup_memory_caches(vcpu))
return;
- kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+ kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
sizeof(pt_element_t), 0);
}
}
@@ -540,7 +516,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
pt_element_t pt[256 / sizeof(pt_element_t)];
gpa_t pte_gpa;
- if (sp->role.metaphysical
+ if (sp->role.direct
|| (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
nonpaging_prefetch_page(vcpu, sp);
return;
@@ -619,7 +595,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
#undef pt_element_t
#undef guest_walker
-#undef shadow_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a9e769e4e251..1821c2078199 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,9 +38,6 @@ MODULE_LICENSE("GPL");
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
-#define DR7_GD_MASK (1 << 13)
-#define DR6_BD_MASK (1 << 13)
-
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3
@@ -50,6 +47,15 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+/* Turn on to get debugging output*/
+/* #define NESTED_DEBUG */
+
+#ifdef NESTED_DEBUG
+#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
+#else
+#define nsvm_printk(fmt, args...) do {} while(0)
+#endif
+
/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static bool npt_enabled = true;
@@ -60,14 +66,29 @@ static int npt = 1;
module_param(npt, int, S_IRUGO);
+static int nested = 0;
+module_param(nested, int, S_IRUGO);
+
static void kvm_reput_irq(struct vcpu_svm *svm);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
+static int nested_svm_vmexit(struct vcpu_svm *svm);
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+ void *arg2, void *opaque);
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code);
+
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_svm, vcpu);
}
+static inline bool is_nested(struct vcpu_svm *svm)
+{
+ return svm->nested_vmcb;
+}
+
static unsigned long iopm_base;
struct kvm_ldttss_desc {
@@ -157,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val)
asm volatile ("mov %0, %%cr2" :: "r" (val));
}
-static inline unsigned long read_dr6(void)
-{
- unsigned long dr6;
-
- asm volatile ("mov %%dr6, %0" : "=r" (dr6));
- return dr6;
-}
-
-static inline void write_dr6(unsigned long val)
-{
- asm volatile ("mov %0, %%dr6" :: "r" (val));
-}
-
-static inline unsigned long read_dr7(void)
-{
- unsigned long dr7;
-
- asm volatile ("mov %%dr7, %0" : "=r" (dr7));
- return dr7;
-}
-
-static inline void write_dr7(unsigned long val)
-{
- asm volatile ("mov %0, %%dr7" :: "r" (val));
-}
-
static inline void force_new_asid(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->asid_generation--;
@@ -198,7 +193,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if (!npt_enabled && !(efer & EFER_LMA))
efer &= ~EFER_LME;
- to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+ to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
vcpu->arch.shadow_efer = efer;
}
@@ -207,6 +202,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
{
struct vcpu_svm *svm = to_svm(vcpu);
+ /* If we are within a nested VM we'd better #VMEXIT and let the
+ guest handle the exception */
+ if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
+ return;
+
svm->vmcb->control.event_inj = nr
| SVM_EVTINJ_VALID
| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
@@ -242,7 +242,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, svm->next_rip);
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
- vcpu->arch.interrupt_window_open = 1;
+ vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static int has_svm(void)
@@ -250,7 +250,7 @@ static int has_svm(void)
const char *msg;
if (!cpu_has_svm(&msg)) {
- printk(KERN_INFO "has_svn: %s\n", msg);
+ printk(KERN_INFO "has_svm: %s\n", msg);
return 0;
}
@@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage)
svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
+ wrmsrl(MSR_EFER, efer | EFER_SVME);
wrmsrl(MSR_VM_HSAVE_PA,
page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -417,6 +417,14 @@ static __init int svm_hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
+ if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+ kvm_enable_efer_bits(EFER_FFXSR);
+
+ if (nested) {
+ printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
+ kvm_enable_efer_bits(EFER_SVME);
+ }
+
for_each_online_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
@@ -559,7 +567,7 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- save->efer = MSR_EFER_SVME_MASK;
+ save->efer = EFER_SVME;
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400;
save->rflags = 2;
@@ -591,6 +599,9 @@ static void init_vmcb(struct vcpu_svm *svm)
save->cr4 = 0;
}
force_new_asid(&svm->vcpu);
+
+ svm->nested_vmcb = 0;
+ svm->vcpu.arch.hflags = HF_GIF_MASK;
}
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -615,6 +626,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
struct vcpu_svm *svm;
struct page *page;
struct page *msrpm_pages;
+ struct page *hsave_page;
+ struct page *nested_msrpm_pages;
int err;
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -637,14 +650,25 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
if (!msrpm_pages)
goto uninit;
+
+ nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+ if (!nested_msrpm_pages)
+ goto uninit;
+
svm->msrpm = page_address(msrpm_pages);
svm_vcpu_init_msrpm(svm->msrpm);
+ hsave_page = alloc_page(GFP_KERNEL);
+ if (!hsave_page)
+ goto uninit;
+ svm->hsave = page_address(hsave_page);
+
+ svm->nested_msrpm = page_address(nested_msrpm_pages);
+
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
- memset(svm->db_regs, 0, sizeof(svm->db_regs));
init_vmcb(svm);
fx_init(&svm->vcpu);
@@ -669,6 +693,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+ __free_page(virt_to_page(svm->hsave));
+ __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -718,6 +744,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
+static void svm_set_vintr(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
+}
+
+static void svm_clear_vintr(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
{
struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -760,20 +796,37 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
- /*
- * SVM always stores 0 for the 'G' bit in the CS selector in
- * the VMCB on a VMEXIT. This hurts cross-vendor migration:
- * Intel's VMENTRY has a check on the 'G' bit.
- */
- if (seg == VCPU_SREG_CS)
+ switch (seg) {
+ case VCPU_SREG_CS:
+ /*
+ * SVM always stores 0 for the 'G' bit in the CS selector in
+ * the VMCB on a VMEXIT. This hurts cross-vendor migration:
+ * Intel's VMENTRY has a check on the 'G' bit.
+ */
var->g = s->limit > 0xfffff;
-
- /*
- * Work around a bug where the busy flag in the tr selector
- * isn't exposed
- */
- if (seg == VCPU_SREG_TR)
+ break;
+ case VCPU_SREG_TR:
+ /*
+ * Work around a bug where the busy flag in the tr selector
+ * isn't exposed
+ */
var->type |= 0x2;
+ break;
+ case VCPU_SREG_DS:
+ case VCPU_SREG_ES:
+ case VCPU_SREG_FS:
+ case VCPU_SREG_GS:
+ /*
+ * The accessed bit must always be set in the segment
+ * descriptor cache, although it can be cleared in the
+ * descriptor, the cached bit always remains at 1. Since
+ * Intel has a check on this, set it here to support
+ * cross-vendor migration.
+ */
+ if (!var->unusable)
+ var->type |= 0x1;
+ break;
+ }
var->unusable = !var->present;
}
@@ -905,9 +958,37 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
}
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
{
- return -EOPNOTSUPP;
+ int old_debug = vcpu->guest_debug;
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ vcpu->guest_debug = dbg->control;
+
+ svm->vmcb->control.intercept_exceptions &=
+ ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+ if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ svm->vmcb->control.intercept_exceptions |=
+ 1 << DB_VECTOR;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ svm->vmcb->control.intercept_exceptions |=
+ 1 << BP_VECTOR;
+ } else
+ vcpu->guest_debug = 0;
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
+ else
+ svm->vmcb->save.dr7 = vcpu->arch.dr7;
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
+ svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+
+ return 0;
}
static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -949,7 +1030,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
{
- unsigned long val = to_svm(vcpu)->db_regs[dr];
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long val;
+
+ switch (dr) {
+ case 0 ... 3:
+ val = vcpu->arch.db[dr];
+ break;
+ case 6:
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ val = vcpu->arch.dr6;
+ else
+ val = svm->vmcb->save.dr6;
+ break;
+ case 7:
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ val = vcpu->arch.dr7;
+ else
+ val = svm->vmcb->save.dr7;
+ break;
+ default:
+ val = 0;
+ }
+
KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
return val;
}
@@ -959,33 +1062,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
{
struct vcpu_svm *svm = to_svm(vcpu);
- *exception = 0;
+ KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
- if (svm->vmcb->save.dr7 & DR7_GD_MASK) {
- svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
- svm->vmcb->save.dr6 |= DR6_BD_MASK;
- *exception = DB_VECTOR;
- return;
- }
+ *exception = 0;
switch (dr) {
case 0 ... 3:
- svm->db_regs[dr] = value;
+ vcpu->arch.db[dr] = value;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ vcpu->arch.eff_db[dr] = value;
return;
case 4 ... 5:
- if (vcpu->arch.cr4 & X86_CR4_DE) {
+ if (vcpu->arch.cr4 & X86_CR4_DE)
*exception = UD_VECTOR;
+ return;
+ case 6:
+ if (value & 0xffffffff00000000ULL) {
+ *exception = GP_VECTOR;
return;
}
- case 7: {
- if (value & ~((1ULL << 32) - 1)) {
+ vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
+ return;
+ case 7:
+ if (value & 0xffffffff00000000ULL) {
*exception = GP_VECTOR;
return;
}
- svm->vmcb->save.dr7 = value;
+ vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+ svm->vmcb->save.dr7 = vcpu->arch.dr7;
+ vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
+ }
return;
- }
default:
+ /* FIXME: Possible case? */
printk(KERN_DEBUG "%s: unexpected dr %u\n",
__func__, dr);
*exception = UD_VECTOR;
@@ -1031,6 +1141,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
}
+static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (!(svm->vcpu.guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ kvm_queue_exception(&svm->vcpu, DB_VECTOR);
+ return 1;
+ }
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ return 0;
+}
+
+static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+ kvm_run->debug.arch.exception = BP_VECTOR;
+ return 0;
+}
+
static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
int er;
@@ -1080,7 +1211,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
- int size, down, in, string, rep;
+ int size, in, string;
unsigned port;
++svm->vcpu.stat.io_exits;
@@ -1099,8 +1230,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
- rep = (io_info & SVM_IOIO_REP_MASK) != 0;
- down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
@@ -1139,6 +1268,567 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
+static int nested_svm_check_permissions(struct vcpu_svm *svm)
+{
+ if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
+ || !is_paging(&svm->vcpu)) {
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ if (svm->vmcb->save.cpl) {
+ kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code)
+{
+ if (is_nested(svm)) {
+ svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = error_code;
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+ if (nested_svm_exit_handled(svm, false)) {
+ nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
+
+ nested_svm_vmexit(svm);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static inline int nested_svm_intr(struct vcpu_svm *svm)
+{
+ if (is_nested(svm)) {
+ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ return 0;
+
+ if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
+ return 0;
+
+ svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+
+ if (nested_svm_exit_handled(svm, false)) {
+ nsvm_printk("VMexit -> INTR\n");
+ nested_svm_vmexit(svm);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
+{
+ struct page *page;
+
+ down_read(&current->mm->mmap_sem);
+ page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+ up_read(&current->mm->mmap_sem);
+
+ if (is_error_page(page)) {
+ printk(KERN_INFO "%s: could not find page at 0x%llx\n",
+ __func__, gpa);
+ kvm_release_page_clean(page);
+ kvm_inject_gp(&svm->vcpu, 0);
+ return NULL;
+ }
+ return page;
+}
+
+static int nested_svm_do(struct vcpu_svm *svm,
+ u64 arg1_gpa, u64 arg2_gpa, void *opaque,
+ int (*handler)(struct vcpu_svm *svm,
+ void *arg1,
+ void *arg2,
+ void *opaque))
+{
+ struct page *arg1_page;
+ struct page *arg2_page = NULL;
+ void *arg1;
+ void *arg2 = NULL;
+ int retval;
+
+ arg1_page = nested_svm_get_page(svm, arg1_gpa);
+ if(arg1_page == NULL)
+ return 1;
+
+ if (arg2_gpa) {
+ arg2_page = nested_svm_get_page(svm, arg2_gpa);
+ if(arg2_page == NULL) {
+ kvm_release_page_clean(arg1_page);
+ return 1;
+ }
+ }
+
+ arg1 = kmap_atomic(arg1_page, KM_USER0);
+ if (arg2_gpa)
+ arg2 = kmap_atomic(arg2_page, KM_USER1);
+
+ retval = handler(svm, arg1, arg2, opaque);
+
+ kunmap_atomic(arg1, KM_USER0);
+ if (arg2_gpa)
+ kunmap_atomic(arg2, KM_USER1);
+
+ kvm_release_page_dirty(arg1_page);
+ if (arg2_gpa)
+ kvm_release_page_dirty(arg2_page);
+
+ return retval;
+}
+
+static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
+ void *arg1,
+ void *arg2,
+ void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ bool kvm_overrides = *(bool *)opaque;
+ u32 exit_code = svm->vmcb->control.exit_code;
+
+ if (kvm_overrides) {
+ switch (exit_code) {
+ case SVM_EXIT_INTR:
+ case SVM_EXIT_NMI:
+ return 0;
+ /* For now we are always handling NPFs when using them */
+ case SVM_EXIT_NPF:
+ if (npt_enabled)
+ return 0;
+ break;
+ /* When we're shadowing, trap PFs */
+ case SVM_EXIT_EXCP_BASE + PF_VECTOR:
+ if (!npt_enabled)
+ return 0;
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (exit_code) {
+ case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
+ u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
+ if (nested_vmcb->control.intercept_cr_read & cr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
+ u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
+ if (nested_vmcb->control.intercept_cr_write & cr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
+ u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
+ if (nested_vmcb->control.intercept_dr_read & dr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
+ u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
+ if (nested_vmcb->control.intercept_dr_write & dr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+ u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
+ if (nested_vmcb->control.intercept_exceptions & excp_bits)
+ return 1;
+ break;
+ }
+ default: {
+ u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
+ nsvm_printk("exit code: 0x%x\n", exit_code);
+ if (nested_vmcb->control.intercept & exit_bits)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
+ void *arg1, void *arg2,
+ void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ u8 *msrpm = (u8 *)arg2;
+ u32 t0, t1;
+ u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+ u32 param = svm->vmcb->control.exit_info_1 & 1;
+
+ if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ return 0;
+
+ switch(msr) {
+ case 0 ... 0x1fff:
+ t0 = (msr * 2) % 8;
+ t1 = msr / 8;
+ break;
+ case 0xc0000000 ... 0xc0001fff:
+ t0 = (8192 + msr - 0xc0000000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ case 0xc0010000 ... 0xc0011fff:
+ t0 = (16384 + msr - 0xc0010000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ default:
+ return 1;
+ break;
+ }
+ if (msrpm[t1] & ((1 << param) << t0))
+ return 1;
+
+ return 0;
+}
+
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
+{
+ bool k = kvm_override;
+
+ switch (svm->vmcb->control.exit_code) {
+ case SVM_EXIT_MSR:
+ return nested_svm_do(svm, svm->nested_vmcb,
+ svm->nested_vmcb_msrpm, NULL,
+ nested_svm_exit_handled_msr);
+ default: break;
+ }
+
+ return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
+ nested_svm_exit_handled_real);
+}
+
+static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ struct vmcb *hsave = svm->hsave;
+ u64 nested_save[] = { nested_vmcb->save.cr0,
+ nested_vmcb->save.cr3,
+ nested_vmcb->save.cr4,
+ nested_vmcb->save.efer,
+ nested_vmcb->control.intercept_cr_read,
+ nested_vmcb->control.intercept_cr_write,
+ nested_vmcb->control.intercept_dr_read,
+ nested_vmcb->control.intercept_dr_write,
+ nested_vmcb->control.intercept_exceptions,
+ nested_vmcb->control.intercept,
+ nested_vmcb->control.msrpm_base_pa,
+ nested_vmcb->control.iopm_base_pa,
+ nested_vmcb->control.tsc_offset };
+
+ /* Give the current vmcb to the guest */
+ memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
+ nested_vmcb->save.cr0 = nested_save[0];
+ if (!npt_enabled)
+ nested_vmcb->save.cr3 = nested_save[1];
+ nested_vmcb->save.cr4 = nested_save[2];
+ nested_vmcb->save.efer = nested_save[3];
+ nested_vmcb->control.intercept_cr_read = nested_save[4];
+ nested_vmcb->control.intercept_cr_write = nested_save[5];
+ nested_vmcb->control.intercept_dr_read = nested_save[6];
+ nested_vmcb->control.intercept_dr_write = nested_save[7];
+ nested_vmcb->control.intercept_exceptions = nested_save[8];
+ nested_vmcb->control.intercept = nested_save[9];
+ nested_vmcb->control.msrpm_base_pa = nested_save[10];
+ nested_vmcb->control.iopm_base_pa = nested_save[11];
+ nested_vmcb->control.tsc_offset = nested_save[12];
+
+ /* We always set V_INTR_MASKING and remember the old value in hflags */
+ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
+ (nested_vmcb->control.int_vector)) {
+ nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
+ nested_vmcb->control.int_vector);
+ }
+
+ /* Restore the original control entries */
+ svm->vmcb->control = hsave->control;
+
+ /* Kill any pending exceptions */
+ if (svm->vcpu.arch.exception.pending == true)
+ nsvm_printk("WARNING: Pending Exception\n");
+ svm->vcpu.arch.exception.pending = false;
+
+ /* Restore selected save entries */
+ svm->vmcb->save.es = hsave->save.es;
+ svm->vmcb->save.cs = hsave->save.cs;
+ svm->vmcb->save.ss = hsave->save.ss;
+ svm->vmcb->save.ds = hsave->save.ds;
+ svm->vmcb->save.gdtr = hsave->save.gdtr;
+ svm->vmcb->save.idtr = hsave->save.idtr;
+ svm->vmcb->save.rflags = hsave->save.rflags;
+ svm_set_efer(&svm->vcpu, hsave->save.efer);
+ svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
+ svm_set_cr4(&svm->vcpu, hsave->save.cr4);
+ if (npt_enabled) {
+ svm->vmcb->save.cr3 = hsave->save.cr3;
+ svm->vcpu.arch.cr3 = hsave->save.cr3;
+ } else {
+ kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
+ }
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
+ svm->vmcb->save.dr7 = 0;
+ svm->vmcb->save.cpl = 0;
+ svm->vmcb->control.exit_int_info = 0;
+
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+ /* Exit nested SVM mode */
+ svm->nested_vmcb = 0;
+
+ return 0;
+}
+
+static int nested_svm_vmexit(struct vcpu_svm *svm)
+{
+ nsvm_printk("VMexit\n");
+ if (nested_svm_do(svm, svm->nested_vmcb, 0,
+ NULL, nested_svm_vmexit_real))
+ return 1;
+
+ kvm_mmu_reset_context(&svm->vcpu);
+ kvm_mmu_load(&svm->vcpu);
+
+ return 0;
+}
+
+static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ int i;
+ u32 *nested_msrpm = (u32*)arg1;
+ for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
+ svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
+ svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
+
+ return 0;
+}
+
+static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ struct vmcb *hsave = svm->hsave;
+
+ /* nested_vmcb is our indicator if nested SVM is activated */
+ svm->nested_vmcb = svm->vmcb->save.rax;
+
+ /* Clear internal status */
+ svm->vcpu.arch.exception.pending = false;
+
+ /* Save the old vmcb, so we don't need to pick what we save, but
+ can restore everything when a VMEXIT occurs */
+ memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
+ /* We need to remember the original CR3 in the SPT case */
+ if (!npt_enabled)
+ hsave->save.cr3 = svm->vcpu.arch.cr3;
+ hsave->save.cr4 = svm->vcpu.arch.cr4;
+ hsave->save.rip = svm->next_rip;
+
+ if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
+ svm->vcpu.arch.hflags |= HF_HIF_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+ /* Load the nested guest state */
+ svm->vmcb->save.es = nested_vmcb->save.es;
+ svm->vmcb->save.cs = nested_vmcb->save.cs;
+ svm->vmcb->save.ss = nested_vmcb->save.ss;
+ svm->vmcb->save.ds = nested_vmcb->save.ds;
+ svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
+ svm->vmcb->save.idtr = nested_vmcb->save.idtr;
+ svm->vmcb->save.rflags = nested_vmcb->save.rflags;
+ svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
+ svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
+ svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
+ if (npt_enabled) {
+ svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
+ svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
+ } else {
+ kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
+ kvm_mmu_reset_context(&svm->vcpu);
+ }
+ svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
+ /* In case we don't even reach vcpu_run, the fields are not updated */
+ svm->vmcb->save.rax = nested_vmcb->save.rax;
+ svm->vmcb->save.rsp = nested_vmcb->save.rsp;
+ svm->vmcb->save.rip = nested_vmcb->save.rip;
+ svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
+ svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+ svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+
+ /* We don't want a nested guest to be more powerful than the guest,
+ so all intercepts are ORed */
+ svm->vmcb->control.intercept_cr_read |=
+ nested_vmcb->control.intercept_cr_read;
+ svm->vmcb->control.intercept_cr_write |=
+ nested_vmcb->control.intercept_cr_write;
+ svm->vmcb->control.intercept_dr_read |=
+ nested_vmcb->control.intercept_dr_read;
+ svm->vmcb->control.intercept_dr_write |=
+ nested_vmcb->control.intercept_dr_write;
+ svm->vmcb->control.intercept_exceptions |=
+ nested_vmcb->control.intercept_exceptions;
+
+ svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
+
+ svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+
+ force_new_asid(&svm->vcpu);
+ svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
+ svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
+ svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+ if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
+ nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
+ nested_vmcb->control.int_ctl);
+ }
+ if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
+ svm->vcpu.arch.hflags |= HF_VINTR_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+
+ nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
+ nested_vmcb->control.exit_int_info,
+ nested_vmcb->control.int_state);
+
+ svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
+ svm->vmcb->control.int_state = nested_vmcb->control.int_state;
+ svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
+ if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
+ nsvm_printk("Injecting Event: 0x%x\n",
+ nested_vmcb->control.event_inj);
+ svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
+ svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+ return 0;
+}
+
+static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+{
+ to_vmcb->save.fs = from_vmcb->save.fs;
+ to_vmcb->save.gs = from_vmcb->save.gs;
+ to_vmcb->save.tr = from_vmcb->save.tr;
+ to_vmcb->save.ldtr = from_vmcb->save.ldtr;
+ to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
+ to_vmcb->save.star = from_vmcb->save.star;
+ to_vmcb->save.lstar = from_vmcb->save.lstar;
+ to_vmcb->save.cstar = from_vmcb->save.cstar;
+ to_vmcb->save.sfmask = from_vmcb->save.sfmask;
+ to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
+ to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
+ to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
+
+ return 1;
+}
+
+static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
+ void *arg2, void *opaque)
+{
+ return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
+}
+
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+ void *arg2, void *opaque)
+{
+ return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
+}
+
+static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
+
+ return 1;
+}
+
+static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
+
+ return 1;
+}
+
+static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ nsvm_printk("VMrun\n");
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
+ NULL, nested_svm_vmrun))
+ return 1;
+
+ if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
+ NULL, nested_svm_vmrun_msrpm))
+ return 1;
+
+ return 1;
+}
+
+static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+ return 1;
+}
+
+static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+
+ /* After a CLGI no interrupts should come */
+ svm_clear_vintr(svm);
+ svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+
+ return 1;
+}
+
static int invalid_op_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
@@ -1250,6 +1940,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
case MSR_IA32_LASTINTTOIP:
*data = svm->vmcb->save.last_excp_to;
break;
+ case MSR_VM_HSAVE_PA:
+ *data = svm->hsave_msr;
+ break;
+ case MSR_VM_CR:
+ *data = 0;
+ break;
+ case MSR_IA32_UCODE_REV:
+ *data = 0x01000065;
+ break;
default:
return kvm_get_msr_common(vcpu, ecx, data);
}
@@ -1344,6 +2043,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_VM_HSAVE_PA:
+ svm->hsave_msr = data;
+ break;
default:
return kvm_set_msr_common(vcpu, ecx, data);
}
@@ -1380,7 +2082,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
{
KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
- svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+ svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
/*
* If the user space waits to inject interrupts, exit as soon as
@@ -1417,6 +2119,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_WRITE_DR3] = emulate_on_interception,
[SVM_EXIT_WRITE_DR5] = emulate_on_interception,
[SVM_EXIT_WRITE_DR7] = emulate_on_interception,
+ [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
+ [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
@@ -1436,12 +2140,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_MSR] = msr_interception,
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
- [SVM_EXIT_VMRUN] = invalid_op_interception,
+ [SVM_EXIT_VMRUN] = vmrun_interception,
[SVM_EXIT_VMMCALL] = vmmcall_interception,
- [SVM_EXIT_VMLOAD] = invalid_op_interception,
- [SVM_EXIT_VMSAVE] = invalid_op_interception,
- [SVM_EXIT_STGI] = invalid_op_interception,
- [SVM_EXIT_CLGI] = invalid_op_interception,
+ [SVM_EXIT_VMLOAD] = vmload_interception,
+ [SVM_EXIT_VMSAVE] = vmsave_interception,
+ [SVM_EXIT_STGI] = stgi_interception,
+ [SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = invalid_op_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
@@ -1457,6 +2161,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
(u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
+ if (is_nested(svm)) {
+ nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
+ exit_code, svm->vmcb->control.exit_info_1,
+ svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
+ if (nested_svm_exit_handled(svm, true)) {
+ nested_svm_vmexit(svm);
+ nsvm_printk("-> #VMEXIT\n");
+ return 1;
+ }
+ }
+
if (npt_enabled) {
int mmu_reload = 0;
if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1544,6 +2259,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ nested_svm_intr(svm);
+
svm_inject_irq(svm, irq);
}
@@ -1589,11 +2306,17 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
if (!kvm_cpu_has_interrupt(vcpu))
goto out;
+ if (nested_svm_intr(svm))
+ goto out;
+
+ if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
+ goto out;
+
if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
(vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
/* unable to deliver irq, set pending irq */
- vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+ svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
goto out;
}
@@ -1615,7 +2338,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
}
svm->vcpu.arch.interrupt_window_open =
- !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+ !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+ (svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1637,9 +2361,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
+ if (nested_svm_intr(svm))
+ return;
+
svm->vcpu.arch.interrupt_window_open =
(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- (svm->vmcb->save.rflags & X86_EFLAGS_IF));
+ (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
+ (svm->vcpu.arch.hflags & HF_GIF_MASK));
if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
/*
@@ -1652,9 +2380,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
*/
if (!svm->vcpu.arch.interrupt_window_open &&
(svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
- control->intercept |= 1ULL << INTERCEPT_VINTR;
- else
- control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+ svm_set_vintr(svm);
+ else
+ svm_clear_vintr(svm);
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -1662,22 +2390,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static void save_db_regs(unsigned long *db_regs)
-{
- asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
- asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1]));
- asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2]));
- asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3]));
-}
-
-static void load_db_regs(unsigned long *db_regs)
-{
- asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0]));
- asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1]));
- asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2]));
- asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
-}
-
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
{
force_new_asid(vcpu);
@@ -1736,19 +2448,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
gs_selector = kvm_read_gs();
ldt_selector = kvm_read_ldt();
svm->host_cr2 = kvm_read_cr2();
- svm->host_dr6 = read_dr6();
- svm->host_dr7 = read_dr7();
- svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ if (!is_nested(svm))
+ svm->vmcb->save.cr2 = vcpu->arch.cr2;
/* required for live migration with NPT */
if (npt_enabled)
svm->vmcb->save.cr3 = vcpu->arch.cr3;
- if (svm->vmcb->save.dr7 & 0xff) {
- write_dr7(0);
- save_db_regs(svm->host_db_regs);
- load_db_regs(svm->db_regs);
- }
-
clgi();
local_irq_enable();
@@ -1824,16 +2529,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
#endif
);
- if ((svm->vmcb->save.dr7 & 0xff))
- load_db_regs(svm->host_db_regs);
-
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
- write_dr6(svm->host_dr6);
- write_dr7(svm->host_dr7);
kvm_write_cr2(svm->host_cr2);
kvm_load_fs(fs_selector);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7611af576829..bb481330716f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
+ enum emulation_result invalid_state_emulation_result;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
@@ -189,21 +190,21 @@ static inline int is_page_fault(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_no_device(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_invalid_opcode(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_external_interrupt(u32 intr_info)
@@ -480,8 +481,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
if (!vcpu->fpu_active)
eb |= 1u << NM_VECTOR;
- if (vcpu->guest_debug.enabled)
- eb |= 1u << DB_VECTOR;
+ if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ eb |= 1u << DB_VECTOR;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ eb |= 1u << BP_VECTOR;
+ }
if (vcpu->arch.rmode.active)
eb = ~0;
if (vm_need_ept())
@@ -747,29 +753,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 intr_info = nr | INTR_INFO_VALID_MASK;
- if (has_error_code)
+ if (has_error_code) {
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+ intr_info |= INTR_INFO_DELIVER_CODE_MASK;
+ }
if (vcpu->arch.rmode.active) {
vmx->rmode.irq.pending = true;
vmx->rmode.irq.vector = nr;
vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- if (nr == BP_VECTOR)
+ if (nr == BP_VECTOR || nr == OF_VECTOR)
vmx->rmode.irq.rip++;
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- nr | INTR_TYPE_SOFT_INTR
- | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
- | INTR_INFO_VALID_MASK);
+ intr_info |= INTR_TYPE_SOFT_INTR;
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
return;
}
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- nr | INTR_TYPE_EXCEPTION
- | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
- | INTR_INFO_VALID_MASK);
+ if (nr == BP_VECTOR || nr == OF_VECTOR) {
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+ intr_info |= INTR_TYPE_SOFT_EXCEPTION;
+ } else
+ intr_info |= INTR_TYPE_HARD_EXCEPTION;
+
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
}
static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -856,11 +866,8 @@ static u64 guest_read_tsc(void)
* writes 'guest_tsc' into guest's timestamp counter "register"
* guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
*/
-static void guest_write_tsc(u64 guest_tsc)
+static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
{
- u64 host_tsc;
-
- rdtscll(host_tsc);
vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
}
@@ -925,14 +932,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_msr_entry *msr;
+ u64 host_tsc;
int ret = 0;
switch (msr_index) {
-#ifdef CONFIG_X86_64
case MSR_EFER:
vmx_load_host_state(vmx);
ret = kvm_set_msr_common(vcpu, msr_index, data);
break;
+#ifdef CONFIG_X86_64
case MSR_FS_BASE:
vmcs_writel(GUEST_FS_BASE, data);
break;
@@ -950,7 +958,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_TIME_STAMP_COUNTER:
- guest_write_tsc(data);
+ rdtscll(host_tsc);
+ guest_write_tsc(data, host_tsc);
break;
case MSR_P6_PERFCTR0:
case MSR_P6_PERFCTR1:
@@ -999,40 +1008,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
}
}
-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
{
- unsigned long dr7 = 0x400;
- int old_singlestep;
-
- old_singlestep = vcpu->guest_debug.singlestep;
-
- vcpu->guest_debug.enabled = dbg->enabled;
- if (vcpu->guest_debug.enabled) {
- int i;
+ int old_debug = vcpu->guest_debug;
+ unsigned long flags;
- dr7 |= 0x200; /* exact */
- for (i = 0; i < 4; ++i) {
- if (!dbg->breakpoints[i].enabled)
- continue;
- vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
- dr7 |= 2 << (i*2); /* global enable */
- dr7 |= 0 << (i*4+16); /* execution breakpoint */
- }
+ vcpu->guest_debug = dbg->control;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+ vcpu->guest_debug = 0;
- vcpu->guest_debug.singlestep = dbg->singlestep;
- } else
- vcpu->guest_debug.singlestep = 0;
-
- if (old_singlestep && !vcpu->guest_debug.singlestep) {
- unsigned long flags;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
+ else
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
- flags = vmcs_readl(GUEST_RFLAGS);
+ flags = vmcs_readl(GUEST_RFLAGS);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
- vmcs_writel(GUEST_RFLAGS, flags);
- }
+ vmcs_writel(GUEST_RFLAGS, flags);
update_exception_bitmap(vcpu);
- vmcs_writel(GUEST_DR7, dr7);
return 0;
}
@@ -1433,6 +1430,29 @@ continue_rmode:
init_rmode(vcpu->kvm);
}
+static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+
+ vcpu->arch.shadow_efer = efer;
+ if (!msr)
+ return;
+ if (efer & EFER_LMA) {
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ vmcs_read32(VM_ENTRY_CONTROLS) |
+ VM_ENTRY_IA32E_MODE);
+ msr->data = efer;
+ } else {
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ vmcs_read32(VM_ENTRY_CONTROLS) &
+ ~VM_ENTRY_IA32E_MODE);
+
+ msr->data = efer & ~EFER_LME;
+ }
+ setup_msrs(vmx);
+}
+
#ifdef CONFIG_X86_64
static void enter_lmode(struct kvm_vcpu *vcpu)
@@ -1447,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
(guest_tr_ar & ~AR_TYPE_MASK)
| AR_TYPE_BUSY_64_TSS);
}
-
vcpu->arch.shadow_efer |= EFER_LMA;
-
- find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;
- vmcs_write32(VM_ENTRY_CONTROLS,
- vmcs_read32(VM_ENTRY_CONTROLS)
- | VM_ENTRY_IA32E_MODE);
+ vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
}
static void exit_lmode(struct kvm_vcpu *vcpu)
@@ -1612,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcs_writel(GUEST_CR4, hw_cr4);
}
-static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
-
- vcpu->arch.shadow_efer = efer;
- if (!msr)
- return;
- if (efer & EFER_LMA) {
- vmcs_write32(VM_ENTRY_CONTROLS,
- vmcs_read32(VM_ENTRY_CONTROLS) |
- VM_ENTRY_IA32E_MODE);
- msr->data = efer;
-
- } else {
- vmcs_write32(VM_ENTRY_CONTROLS,
- vmcs_read32(VM_ENTRY_CONTROLS) &
- ~VM_ENTRY_IA32E_MODE);
-
- msr->data = efer & ~EFER_LME;
- }
- setup_msrs(vmx);
-}
-
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1653,7 +1644,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->limit = vmcs_read32(sf->limit);
var->selector = vmcs_read16(sf->selector);
ar = vmcs_read32(sf->ar_bytes);
- if (ar & AR_UNUSABLE_MASK)
+ if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
ar = 0;
var->type = ar & 15;
var->s = (ar >> 4) & 1;
@@ -1788,14 +1779,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
cs_rpl = cs.selector & SELECTOR_RPL_MASK;
+ if (cs.unusable)
+ return false;
if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
return false;
if (!cs.s)
return false;
- if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) {
+ if (cs.type & AR_TYPE_WRITEABLE_MASK) {
if (cs.dpl > cs_rpl)
return false;
- } else if (cs.type & AR_TYPE_CODE_MASK) {
+ } else {
if (cs.dpl != cs_rpl)
return false;
}
@@ -1814,7 +1807,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
ss_rpl = ss.selector & SELECTOR_RPL_MASK;
- if ((ss.type != 3) || (ss.type != 7))
+ if (ss.unusable)
+ return true;
+ if (ss.type != 3 && ss.type != 7)
return false;
if (!ss.s)
return false;
@@ -1834,6 +1829,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
vmx_get_segment(vcpu, &var, seg);
rpl = var.selector & SELECTOR_RPL_MASK;
+ if (var.unusable)
+ return true;
if (!var.s)
return false;
if (!var.present)
@@ -1855,9 +1852,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
+ if (tr.unusable)
+ return false;
if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */
return false;
- if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */
+ if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
return false;
if (!tr.present)
return false;
@@ -1871,6 +1870,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
+ if (ldtr.unusable)
+ return true;
if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */
return false;
if (ldtr.type != 2)
@@ -2112,7 +2113,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
u32 host_sysenter_cs, msr_low, msr_high;
u32 junk;
- u64 host_pat;
+ u64 host_pat, tsc_this, tsc_base;
unsigned long a;
struct descriptor_table dt;
int i;
@@ -2240,6 +2241,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
+ tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
+ rdtscll(tsc_this);
+ if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
+ tsc_base = tsc_this;
+
+ guest_write_tsc(0, tsc_base);
return 0;
}
@@ -2319,7 +2326,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, 0);
kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
- /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
vmcs_writel(GUEST_DR7, 0x400);
vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2332,8 +2338,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
- guest_write_tsc(0);
-
/* Special registers */
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
@@ -2486,6 +2490,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
{
vmx_update_window_states(vcpu);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_STI |
+ GUEST_INTR_STATE_MOV_SS);
+
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vcpu->arch.interrupt.pending) {
enable_nmi_window(vcpu);
@@ -2536,24 +2545,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
-{
- struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-
- set_debugreg(dbg->bp[0], 0);
- set_debugreg(dbg->bp[1], 1);
- set_debugreg(dbg->bp[2], 2);
- set_debugreg(dbg->bp[3], 3);
-
- if (dbg->singlestep) {
- unsigned long flags;
-
- flags = vmcs_readl(GUEST_RFLAGS);
- flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- vmcs_writel(GUEST_RFLAGS, flags);
- }
-}
-
static int handle_rmode_exception(struct kvm_vcpu *vcpu,
int vec, u32 err_code)
{
@@ -2570,9 +2561,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* the required debugging infrastructure rework.
*/
switch (vec) {
- case DE_VECTOR:
case DB_VECTOR:
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ return 0;
+ kvm_queue_exception(vcpu, vec);
+ return 1;
case BP_VECTOR:
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ return 0;
+ /* fall through */
+ case DE_VECTOR:
case OF_VECTOR:
case BR_VECTOR:
case UD_VECTOR:
@@ -2589,8 +2588,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 intr_info, error_code;
- unsigned long cr2, rip;
+ u32 intr_info, ex_no, error_code;
+ unsigned long cr2, rip, dr6;
u32 vect_info;
enum emulation_result er;
@@ -2649,14 +2648,30 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
- if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
- (INTR_TYPE_EXCEPTION | 1)) {
+ ex_no = intr_info & INTR_INFO_VECTOR_MASK;
+ switch (ex_no) {
+ case DB_VECTOR:
+ dr6 = vmcs_readl(EXIT_QUALIFICATION);
+ if (!(vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ return 1;
+ }
+ kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
+ kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
+ /* fall through */
+ case BP_VECTOR:
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- return 0;
+ kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+ kvm_run->debug.arch.exception = ex_no;
+ break;
+ default:
+ kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+ kvm_run->ex.exception = ex_no;
+ kvm_run->ex.error_code = error_code;
+ break;
}
- kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
- kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
- kvm_run->ex.error_code = error_code;
return 0;
}
@@ -2677,7 +2692,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
unsigned long exit_qualification;
- int size, down, in, string, rep;
+ int size, in, string;
unsigned port;
++vcpu->stat.io_exits;
@@ -2693,8 +2708,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
size = (exit_qualification & 7) + 1;
in = (exit_qualification & 8) != 0;
- down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
- rep = (exit_qualification & 32) != 0;
port = exit_qualification >> 16;
skip_emulated_instruction(vcpu);
@@ -2795,21 +2808,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
unsigned long val;
int dr, reg;
- /*
- * FIXME: this code assumes the host is debugging the guest.
- * need to deal with guest debugging itself too.
- */
+ dr = vmcs_readl(GUEST_DR7);
+ if (dr & DR7_GD) {
+ /*
+ * As the vm-exit takes precedence over the debug trap, we
+ * need to emulate the latter, either for the host or the
+ * guest debugging itself.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+ kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
+ kvm_run->debug.arch.dr7 = dr;
+ kvm_run->debug.arch.pc =
+ vmcs_readl(GUEST_CS_BASE) +
+ vmcs_readl(GUEST_RIP);
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ return 0;
+ } else {
+ vcpu->arch.dr7 &= ~DR7_GD;
+ vcpu->arch.dr6 |= DR6_BD;
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ return 1;
+ }
+ }
+
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- dr = exit_qualification & 7;
- reg = (exit_qualification >> 8) & 15;
- if (exit_qualification & 16) {
- /* mov from dr */
+ dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+ reg = DEBUG_REG_ACCESS_REG(exit_qualification);
+ if (exit_qualification & TYPE_MOV_FROM_DR) {
switch (dr) {
+ case 0 ... 3:
+ val = vcpu->arch.db[dr];
+ break;
case 6:
- val = 0xffff0ff0;
+ val = vcpu->arch.dr6;
break;
case 7:
- val = 0x400;
+ val = vcpu->arch.dr7;
break;
default:
val = 0;
@@ -2817,7 +2853,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_register_write(vcpu, reg, val);
KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
} else {
- /* mov to dr */
+ val = vcpu->arch.regs[reg];
+ switch (dr) {
+ case 0 ... 3:
+ vcpu->arch.db[dr] = val;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ vcpu->arch.eff_db[dr] = val;
+ break;
+ case 4 ... 5:
+ if (vcpu->arch.cr4 & X86_CR4_DE)
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ break;
+ case 6:
+ if (val & 0xffffffff00000000ULL) {
+ kvm_queue_exception(vcpu, GP_VECTOR);
+ break;
+ }
+ vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+ break;
+ case 7:
+ if (val & 0xffffffff00000000ULL) {
+ kvm_queue_exception(vcpu, GP_VECTOR);
+ break;
+ }
+ vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+ vcpu->arch.switch_db_regs =
+ (val & DR7_BP_EN_MASK);
+ }
+ break;
+ }
+ KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
}
skip_emulated_instruction(vcpu);
return 1;
@@ -2968,17 +3035,25 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
tss_selector = exit_qualification;
- return kvm_task_switch(vcpu, tss_selector, reason);
+ if (!kvm_task_switch(vcpu, tss_selector, reason))
+ return 0;
+
+ /* clear all local breakpoint enable flags */
+ vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
+
+ /*
+ * TODO: What about debug traps on tss switch?
+ * Are we supposed to inject them and update dr6?
+ */
+
+ return 1;
}
static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u64 exit_qualification;
- enum emulation_result er;
gpa_t gpa;
- unsigned long hva;
int gla_validity;
- int r;
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
@@ -3001,32 +3076,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
- if (!kvm_is_error_hva(hva)) {
- r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
- if (r < 0) {
- printk(KERN_ERR "EPT: Not enough memory!\n");
- return -ENOMEM;
- }
- return 1;
- } else {
- /* must be MMIO */
- er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
-
- if (er == EMULATE_FAIL) {
- printk(KERN_ERR
- "EPT: Fail to handle EPT violation vmexit!er is %d\n",
- er);
- printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
- (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
- (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
- printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
- (long unsigned int)exit_qualification);
- return -ENOTSUPP;
- } else if (er == EMULATE_DO_MMIO)
- return 0;
- }
- return 1;
+ return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
}
static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3046,7 +3096,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int err;
+ enum emulation_result err = EMULATE_DONE;
preempt_enable();
local_irq_enable();
@@ -3071,10 +3121,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
local_irq_disable();
preempt_disable();
- /* Guest state should be valid now except if we need to
- * emulate an MMIO */
- if (guest_state_valid(vcpu))
- vmx->emulation_required = 0;
+ vmx->invalid_state_emulation_result = err;
}
/*
@@ -3123,8 +3170,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/* If we need to emulate an MMIO from handle_invalid_guest_state
* we just return 0 */
- if (vmx->emulation_required && emulate_invalid_guest_state)
- return 0;
+ if (vmx->emulation_required && emulate_invalid_guest_state) {
+ if (guest_state_valid(vcpu))
+ vmx->emulation_required = 0;
+ return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
+ }
/* Access CR3 don't cause VMExit in paging mode, so we need
* to sync with guest real CR3. */
@@ -3238,7 +3288,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
vmx->vcpu.arch.nmi_injected = false;
}
kvm_clear_exception_queue(&vmx->vcpu);
- if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
+ if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
+ type == INTR_TYPE_SOFT_EXCEPTION)) {
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
kvm_queue_exception_e(&vmx->vcpu, vector, error);
@@ -3259,6 +3310,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
vmx_update_window_states(vcpu);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_STI |
+ GUEST_INTR_STATE_MOV_SS);
+
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vcpu->arch.interrupt.pending) {
enable_nmi_window(vcpu);
@@ -3347,6 +3403,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
*/
vmcs_writel(HOST_CR0, read_cr0());
+ set_debugreg(vcpu->arch.dr6, 6);
+
asm(
/* Store host registers */
"push %%"R"dx; push %%"R"bp;"
@@ -3441,6 +3499,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
vcpu->arch.regs_dirty = 0;
+ get_debugreg(vcpu->arch.dr6, 6);
+
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
if (vmx->rmode.irq.pending)
fixup_rmode_irq(vmx);
@@ -3595,7 +3655,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_put = vmx_vcpu_put,
.set_guest_debug = set_guest_debug,
- .guest_debug_pre = kvm_guest_debug_pre,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 758b7a155ae9..8ca100a9ecac 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
+#include <linux/cpufreq.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
@@ -69,6 +70,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index);
struct kvm_x86_ops *kvm_x86_ops;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -173,6 +176,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
u32 error_code)
{
++vcpu->stat.pf_guest;
+
if (vcpu->arch.exception.pending) {
if (vcpu->arch.exception.nr == PF_VECTOR) {
printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -361,6 +365,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
+ vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
@@ -442,6 +447,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
+static inline u32 bit(int bitno)
+{
+ return 1 << (bitno & 31);
+}
+
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -456,7 +466,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
+ MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
};
static unsigned num_msrs_to_save;
@@ -481,6 +491,28 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
return;
}
+ if (efer & EFER_FFXSR) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
+ printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
+ kvm_inject_gp(vcpu, 0);
+ return;
+ }
+ }
+
+ if (efer & EFER_SVME) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+ printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+ kvm_inject_gp(vcpu, 0);
+ return;
+ }
+ }
+
kvm_x86_ops->set_efer(vcpu, efer);
efer &= ~EFER_LMA;
@@ -586,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
hv_clock->tsc_to_system_mul);
}
+static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+
static void kvm_write_guest_time(struct kvm_vcpu *v)
{
struct timespec ts;
@@ -596,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
if ((!vcpu->time_page))
return;
- if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
- kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
- vcpu->hv_clock_tsc_khz = tsc_khz;
+ if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
+ kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
+ vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
}
/* Keep irq disabled to prevent changes to the clock */
@@ -629,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
}
+static int kvm_request_guest_time_update(struct kvm_vcpu *v)
+{
+ struct kvm_vcpu_arch *vcpu = &v->arch;
+
+ if (!vcpu->time_page)
+ return 0;
+ set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+ return 1;
+}
+
static bool msr_mtrr_valid(unsigned msr)
{
switch (msr) {
@@ -722,6 +766,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
+ case MSR_VM_HSAVE_PA:
break;
case 0x200 ... 0x2ff:
return set_msr_mtrr(vcpu, msr, data);
@@ -758,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
vcpu->arch.time_page = NULL;
}
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
break;
}
default:
@@ -843,6 +888,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_LASTBRANCHTOIP:
case MSR_IA32_LASTINTFROMIP:
case MSR_IA32_LASTINTTOIP:
+ case MSR_VM_HSAVE_PA:
data = 0;
break;
case MSR_MTRRcap:
@@ -967,10 +1013,13 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
+ case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
case KVM_CAP_SYNC_MMU:
+ case KVM_CAP_REINJECT_CONTROL:
+ case KVM_CAP_IRQ_INJECT_STATUS:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -991,9 +1040,6 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IOMMU:
r = iommu_found();
break;
- case KVM_CAP_CLOCKSOURCE:
- r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
- break;
default:
r = 0;
break;
@@ -1044,7 +1090,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
@@ -1064,7 +1110,7 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1142,8 +1188,8 @@ out:
}
static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries)
{
int r;
@@ -1162,8 +1208,8 @@ out:
}
static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries)
{
int r;
@@ -1172,7 +1218,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
goto out;
r = -EFAULT;
if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+ vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
return 0;
@@ -1181,18 +1227,13 @@ out:
return r;
}
-static inline u32 bit(int bitno)
-{
- return 1 << (bitno & 31);
-}
-
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
- u32 index)
+ u32 index)
{
entry->function = function;
entry->index = index;
cpuid_count(entry->function, entry->index,
- &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
+ &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
entry->flags = 0;
}
@@ -1222,15 +1263,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
#ifdef CONFIG_X86_64
bit(X86_FEATURE_LM) |
#endif
+ bit(X86_FEATURE_FXSR_OPT) |
bit(X86_FEATURE_MMXEXT) |
bit(X86_FEATURE_3DNOWEXT) |
bit(X86_FEATURE_3DNOW);
const u32 kvm_supported_word3_x86_features =
bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
const u32 kvm_supported_word6_x86_features =
- bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
+ bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
+ bit(X86_FEATURE_SVM);
- /* all func 2 cpuid_count() should be called on the same cpu */
+ /* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
do_cpuid_1_ent(entry, function, index);
++*nent;
@@ -1304,7 +1347,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid_entry2 __user *entries)
{
struct kvm_cpuid_entry2 *cpuid_entries;
int limit, nent = 0, r = -E2BIG;
@@ -1321,7 +1364,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
limit = cpuid_entries[0].eax;
for (func = 1; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,
- &nent, cpuid->nent);
+ &nent, cpuid->nent);
r = -E2BIG;
if (nent >= cpuid->nent)
goto out_free;
@@ -1330,10 +1373,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
limit = cpuid_entries[nent - 1].eax;
for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,
- &nent, cpuid->nent);
+ &nent, cpuid->nent);
r = -EFAULT;
if (copy_to_user(entries, cpuid_entries,
- nent * sizeof(struct kvm_cpuid_entry2)))
+ nent * sizeof(struct kvm_cpuid_entry2)))
goto out_free;
cpuid->nent = nent;
r = 0;
@@ -1477,7 +1520,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
break;
@@ -1490,7 +1533,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
r = -EFAULT;
@@ -1710,6 +1753,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
return r;
}
+static int kvm_vm_ioctl_reinject(struct kvm *kvm,
+ struct kvm_reinject_control *control)
+{
+ if (!kvm->arch.vpit)
+ return -ENXIO;
+ kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+ return 0;
+}
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@@ -1807,13 +1859,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
} else
goto out;
+ r = kvm_setup_default_irq_routing(kvm);
+ if (r) {
+ kfree(kvm->arch.vpic);
+ kfree(kvm->arch.vioapic);
+ goto out;
+ }
break;
case KVM_CREATE_PIT:
+ mutex_lock(&kvm->lock);
+ r = -EEXIST;
+ if (kvm->arch.vpit)
+ goto create_pit_unlock;
r = -ENOMEM;
kvm->arch.vpit = kvm_create_pit(kvm);
if (kvm->arch.vpit)
r = 0;
+ create_pit_unlock:
+ mutex_unlock(&kvm->lock);
break;
+ case KVM_IRQ_LINE_STATUS:
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -1821,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (copy_from_user(&irq_event, argp, sizeof irq_event))
goto out;
if (irqchip_in_kernel(kvm)) {
+ __s32 status;
mutex_lock(&kvm->lock);
- kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
- irq_event.irq, irq_event.level);
+ status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event.irq, irq_event.level);
mutex_unlock(&kvm->lock);
+ if (ioctl == KVM_IRQ_LINE_STATUS) {
+ irq_event.status = status;
+ if (copy_to_user(argp, &irq_event,
+ sizeof irq_event))
+ goto out;
+ }
r = 0;
}
break;
@@ -1907,6 +1979,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_REINJECT_CONTROL: {
+ struct kvm_reinject_control control;
+ r = -EFAULT;
+ if (copy_from_user(&control, argp, sizeof(control)))
+ goto out;
+ r = kvm_vm_ioctl_reinject(kvm, &control);
+ if (r)
+ goto out;
+ r = 0;
+ break;
+ }
default:
;
}
@@ -1960,10 +2043,38 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
return dev;
}
-int emulator_read_std(unsigned long addr,
- void *val,
- unsigned int bytes,
- struct kvm_vcpu *vcpu)
+static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu)
+{
+ void *data = val;
+ int r = X86EMUL_CONTINUE;
+
+ while (bytes) {
+ gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ unsigned offset = addr & (PAGE_SIZE-1);
+ unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
+ int ret;
+
+ if (gpa == UNMAPPED_GVA) {
+ r = X86EMUL_PROPAGATE_FAULT;
+ goto out;
+ }
+ ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
+ if (ret < 0) {
+ r = X86EMUL_UNHANDLEABLE;
+ goto out;
+ }
+
+ bytes -= toread;
+ data += toread;
+ addr += toread;
+ }
+out:
+ return r;
+}
+
+static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu)
{
void *data = val;
int r = X86EMUL_CONTINUE;
@@ -1971,27 +2082,27 @@ int emulator_read_std(unsigned long addr,
while (bytes) {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
- unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
+ unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
if (gpa == UNMAPPED_GVA) {
r = X86EMUL_PROPAGATE_FAULT;
goto out;
}
- ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
+ ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
if (ret < 0) {
r = X86EMUL_UNHANDLEABLE;
goto out;
}
- bytes -= tocopy;
- data += tocopy;
- addr += tocopy;
+ bytes -= towrite;
+ data += towrite;
+ addr += towrite;
}
out:
return r;
}
-EXPORT_SYMBOL_GPL(emulator_read_std);
+
static int emulator_read_emulated(unsigned long addr,
void *val,
@@ -2013,8 +2124,8 @@ static int emulator_read_emulated(unsigned long addr,
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto mmio;
- if (emulator_read_std(addr, val, bytes, vcpu)
- == X86EMUL_CONTINUE)
+ if (kvm_read_guest_virt(addr, val, bytes, vcpu)
+ == X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
@@ -2217,7 +2328,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
- emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
+ kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2225,7 +2336,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
static struct x86_emulate_ops emulate_ops = {
- .read_std = emulator_read_std,
+ .read_std = kvm_read_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -2327,40 +2438,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(emulate_instruction);
-static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
- if (vcpu->arch.pio.guest_pages[i]) {
- kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
- vcpu->arch.pio.guest_pages[i] = NULL;
- }
-}
-
static int pio_copy_data(struct kvm_vcpu *vcpu)
{
void *p = vcpu->arch.pio_data;
- void *q;
+ gva_t q = vcpu->arch.pio.guest_gva;
unsigned bytes;
- int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
+ int ret;
- q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
- PAGE_KERNEL);
- if (!q) {
- free_pio_guest_pages(vcpu);
- return -ENOMEM;
- }
- q += vcpu->arch.pio.guest_page_offset;
bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
if (vcpu->arch.pio.in)
- memcpy(q, p, bytes);
+ ret = kvm_write_guest_virt(q, p, bytes, vcpu);
else
- memcpy(p, q, bytes);
- q -= vcpu->arch.pio.guest_page_offset;
- vunmap(q);
- free_pio_guest_pages(vcpu);
- return 0;
+ ret = kvm_read_guest_virt(q, p, bytes, vcpu);
+ return ret;
}
int complete_pio(struct kvm_vcpu *vcpu)
@@ -2471,7 +2561,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 0;
vcpu->arch.pio.down = 0;
- vcpu->arch.pio.guest_page_offset = 0;
vcpu->arch.pio.rep = 0;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2499,9 +2588,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
gva_t address, int rep, unsigned port)
{
unsigned now, in_page;
- int i, ret = 0;
- int nr_pages = 1;
- struct page *page;
+ int ret = 0;
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2513,7 +2600,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 1;
vcpu->arch.pio.down = down;
- vcpu->arch.pio.guest_page_offset = offset_in_page(address);
vcpu->arch.pio.rep = rep;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2533,15 +2619,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
else
in_page = offset_in_page(address) + size;
now = min(count, (unsigned long)in_page / size);
- if (!now) {
- /*
- * String I/O straddles page boundary. Pin two guest pages
- * so that we satisfy atomicity constraints. Do just one
- * transaction to avoid complexity.
- */
- nr_pages = 2;
+ if (!now)
now = 1;
- }
if (down) {
/*
* String I/O in reverse. Yuck. Kill the guest, fix later.
@@ -2556,15 +2635,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
kvm_x86_ops->skip_emulated_instruction(vcpu);
- for (i = 0; i < nr_pages; ++i) {
- page = gva_to_page(vcpu, address + i * PAGE_SIZE);
- vcpu->arch.pio.guest_pages[i] = page;
- if (!page) {
- kvm_inject_gp(vcpu, 0);
- free_pio_guest_pages(vcpu);
- return 1;
- }
- }
+ vcpu->arch.pio.guest_gva = address;
pio_dev = vcpu_find_pio_dev(vcpu, port,
vcpu->arch.pio.cur_count,
@@ -2572,7 +2643,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
if (!vcpu->arch.pio.in) {
/* string PIO write */
ret = pio_copy_data(vcpu);
- if (ret >= 0 && pio_dev) {
+ if (ret == X86EMUL_PROPAGATE_FAULT) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ if (ret == 0 && pio_dev) {
pio_string_write(pio_dev, vcpu);
complete_pio(vcpu);
if (vcpu->arch.pio.count == 0)
@@ -2587,9 +2662,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
+static void bounce_off(void *info)
+{
+ /* nothing */
+}
+
+static unsigned int ref_freq;
+static unsigned long tsc_khz_ref;
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int i, send_ipi = 0;
+
+ if (!ref_freq)
+ ref_freq = freq->old;
+
+ if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+ return 0;
+ if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+ return 0;
+ per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ if (vcpu->cpu != freq->cpu)
+ continue;
+ if (!kvm_request_guest_time_update(vcpu))
+ continue;
+ if (vcpu->cpu != smp_processor_id())
+ send_ipi++;
+ }
+ }
+ spin_unlock(&kvm_lock);
+
+ if (freq->old < freq->new && send_ipi) {
+ /*
+ * We upscale the frequency. Must make the guest
+ * doesn't see old kvmclock values while running with
+ * the new frequency, otherwise we risk the guest sees
+ * time go backwards.
+ *
+ * In case we update the frequency for another cpu
+ * (which might be in guest context) send an interrupt
+ * to kick the cpu out of guest context. Next time
+ * guest context is entered kvmclock will be updated,
+ * so the guest will not see stale values.
+ */
+ smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+ }
+ return 0;
+}
+
+static struct notifier_block kvmclock_cpufreq_notifier_block = {
+ .notifier_call = kvmclock_cpufreq_notifier
+};
+
int kvm_arch_init(void *opaque)
{
- int r;
+ int r, cpu;
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
@@ -2620,6 +2758,15 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ tsc_khz_ref = tsc_khz;
+ cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+
return 0;
out:
@@ -2827,25 +2974,20 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
return 0;
if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
- !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
+ !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
return 0;
return 1;
}
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index)
{
int i;
- u32 function, index;
- struct kvm_cpuid_entry2 *e, *best;
+ struct kvm_cpuid_entry2 *best = NULL;
- function = kvm_register_read(vcpu, VCPU_REGS_RAX);
- index = kvm_register_read(vcpu, VCPU_REGS_RCX);
- kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
- best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+ struct kvm_cpuid_entry2 *e;
+
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2860,6 +3002,21 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
if (!best || e->function > best->function)
best = e;
}
+ return best;
+}
+
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+ u32 function, index;
+ struct kvm_cpuid_entry2 *best;
+
+ function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+ best = kvm_find_cpuid_entry(vcpu, function, index);
if (best) {
kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
@@ -2945,6 +3102,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->requests) {
if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
__kvm_migrate_timers(vcpu);
+ if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+ kvm_write_guest_time(vcpu);
if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
kvm_mmu_sync_roots(vcpu);
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
@@ -2979,9 +3138,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
- if (vcpu->guest_debug.enabled)
- kvm_x86_ops->guest_debug_pre(vcpu);
-
vcpu->guest_mode = 1;
/*
* Make sure that guest_mode assignment won't happen after
@@ -3002,10 +3158,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_guest_enter();
+ get_debugreg(vcpu->arch.host_dr6, 6);
+ get_debugreg(vcpu->arch.host_dr7, 7);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ get_debugreg(vcpu->arch.host_db[0], 0);
+ get_debugreg(vcpu->arch.host_db[1], 1);
+ get_debugreg(vcpu->arch.host_db[2], 2);
+ get_debugreg(vcpu->arch.host_db[3], 3);
+
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.eff_db[0], 0);
+ set_debugreg(vcpu->arch.eff_db[1], 1);
+ set_debugreg(vcpu->arch.eff_db[2], 2);
+ set_debugreg(vcpu->arch.eff_db[3], 3);
+ }
KVMTRACE_0D(VMENTRY, vcpu, entryexit);
kvm_x86_ops->run(vcpu, kvm_run);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.host_db[0], 0);
+ set_debugreg(vcpu->arch.host_db[1], 1);
+ set_debugreg(vcpu->arch.host_db[2], 2);
+ set_debugreg(vcpu->arch.host_db[3], 3);
+ }
+ set_debugreg(vcpu->arch.host_dr6, 6);
+ set_debugreg(vcpu->arch.host_dr7, 7);
+
vcpu->guest_mode = 0;
local_irq_enable();
@@ -3192,7 +3372,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
/*
* Don't leak debug flags in case they were set for guest debugging
*/
- if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vcpu_put(vcpu);
@@ -3811,15 +3991,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
- int r;
+ int i, r;
vcpu_load(vcpu);
+ if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
+ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+ for (i = 0; i < KVM_NR_DB_REGS; ++i)
+ vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
+ vcpu->arch.switch_db_regs =
+ (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+ } else {
+ for (i = 0; i < KVM_NR_DB_REGS; i++)
+ vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+ vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
+ }
+
r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+ if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+ kvm_queue_exception(vcpu, BP_VECTOR);
+
vcpu_put(vcpu);
return r;
@@ -4007,6 +4204,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = false;
+ vcpu->arch.switch_db_regs = 0;
+ memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+ vcpu->arch.dr6 = DR6_FIXED_1;
+ vcpu->arch.dr7 = DR7_FIXED_1;
+
return kvm_x86_ops->vcpu_reset(vcpu);
}
@@ -4100,6 +4302,8 @@ struct kvm *kvm_arch_create_vm(void)
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+ rdtscll(kvm->arch.vm_init_tsc);
+
return kvm;
}
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d174db7a3370..ca91749d2083 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -178,7 +178,7 @@ static u32 opcode_table[256] = {
0, ImplicitOps | Stack, 0, 0,
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */
- 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
/* 0xD0 - 0xD7 */
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
}
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+ struct x86_emulate_ops *ops,
+ void *dest, int len)
{
struct decode_cache *c = &ctxt->decode;
int rc;
rc = ops->read_emulated(register_address(c, ss_base(ctxt),
c->regs[VCPU_REGS_RSP]),
- &c->src.val, c->src.bytes, ctxt->vcpu);
+ dest, len, ctxt->vcpu);
if (rc != 0)
return rc;
- register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
return rc;
}
@@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct decode_cache *c = &ctxt->decode;
int rc;
- c->src.bytes = c->dst.bytes;
- rc = emulate_pop(ctxt, ops);
+ rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
if (rc != 0)
return rc;
- c->dst.val = c->src.val;
return 0;
}
@@ -1279,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
return 0;
}
+static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+ unsigned long cs;
+
+ rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
+ if (rc)
+ return rc;
+ if (c->op_bytes == 4)
+ c->eip = (u32)c->eip;
+ rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+ if (rc)
+ return rc;
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
+ return rc;
+}
+
static inline int writeback(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
@@ -1467,11 +1485,9 @@ special_insn:
break;
case 0x58 ... 0x5f: /* pop reg */
pop_instruction:
- c->src.bytes = c->op_bytes;
- rc = emulate_pop(ctxt, ops);
+ rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
if (rc != 0)
goto done;
- c->dst.val = c->src.val;
break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1738,6 +1754,11 @@ special_insn:
mov:
c->dst.val = c->src.val;
break;
+ case 0xcb: /* ret far */
+ rc = emulate_ret_far(ctxt, ops);
+ if (rc)
+ goto done;
+ break;
case 0xd0 ... 0xd1: /* Grp2 */
c->src.val = 1;
emulate_grp2(ctxt);
@@ -1908,11 +1929,16 @@ twobyte_insn:
c->dst.type = OP_NONE;
break;
case 3: /* lidt/vmmcall */
- if (c->modrm_mod == 3 && c->modrm_rm == 1) {
- rc = kvm_fix_hypercall(ctxt->vcpu);
- if (rc)
- goto done;
- kvm_emulate_hypercall(ctxt->vcpu);
+ if (c->modrm_mod == 3) {
+ switch (c->modrm_rm) {
+ case 1:
+ rc = kvm_fix_hypercall(ctxt->vcpu);
+ if (rc)
+ goto done;
+ break;
+ default:
+ goto cannot_emulate;
+ }
} else {
rc = read_descriptor(ctxt, ops, c->src.ptr,
&size, &address,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index f3a5305b8adf..9fe4ddaa8f6f 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -348,6 +348,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
* flush_tlb_user() for both user and kernel mappings unless
* the Page Global Enable (PGE) feature bit is set. */
*dx |= 0x00002000;
+ /* We also lie, and say we're family id 5. 6 or greater
+ * leads to a rdmsr in early_init_intel which we can't handle.
+ * Family ID is returned as bits 8-12 in ax. */
+ *ax &= 0xFFFFF0FF;
+ *ax |= 0x00000500;
break;
case 0x80000000:
/* Futureproof this a little: if they ask how much extended
@@ -594,19 +599,21 @@ static void __init lguest_init_IRQ(void)
/* Some systems map "vectors" to interrupts weirdly. Lguest has
* a straightforward 1 to 1 mapping, so force that here. */
__get_cpu_var(vector_irq)[vector] = i;
- if (vector != SYSCALL_VECTOR) {
- set_intr_gate(vector,
- interrupt[vector-FIRST_EXTERNAL_VECTOR]);
- set_irq_chip_and_handler_name(i, &lguest_irq_controller,
- handle_level_irq,
- "level");
- }
+ if (vector != SYSCALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
}
/* This call is required to set up for 4k stacks, where we have
* separate stacks for hard and soft interrupts. */
irq_ctx_init(smp_processor_id());
}
+void lguest_setup_irq(unsigned int irq)
+{
+ irq_to_desc_alloc_cpu(irq, 0);
+ set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
+ handle_level_irq, "level");
+}
+
/*
* Time.
*
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index 491e737ce547..aa0987088774 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -30,20 +30,29 @@ static void fclex(void)
}
/* Needs to be externally visible */
-void finit(void)
+void finit_task(struct task_struct *tsk)
{
- control_word = 0x037f;
- partial_status = 0;
- top = 0; /* We don't keep top in the status word internally. */
- fpu_tag_word = 0xffff;
+ struct i387_soft_struct *soft = &tsk->thread.xstate->soft;
+ struct address *oaddr, *iaddr;
+ soft->cwd = 0x037f;
+ soft->swd = 0;
+ soft->ftop = 0; /* We don't keep top in the status word internally. */
+ soft->twd = 0xffff;
/* The behaviour is different from that detailed in
Section 15.1.6 of the Intel manual */
- operand_address.offset = 0;
- operand_address.selector = 0;
- instruction_address.offset = 0;
- instruction_address.selector = 0;
- instruction_address.opcode = 0;
- no_ip_update = 1;
+ oaddr = (struct address *)&soft->foo;
+ oaddr->offset = 0;
+ oaddr->selector = 0;
+ iaddr = (struct address *)&soft->fip;
+ iaddr->offset = 0;
+ iaddr->selector = 0;
+ iaddr->opcode = 0;
+ soft->no_update = 1;
+}
+
+void finit(void)
+{
+ finit_task(current);
}
/*
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 93d82038af4b..6a518dd08a36 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,11 +32,14 @@ struct kmmio_fault_page {
struct list_head list;
struct kmmio_fault_page *release_next;
unsigned long page; /* location of the fault page */
+ bool old_presence; /* page presence prior to arming */
+ bool armed;
/*
* Number of times this page has been registered as a part
* of a probe. If zero, page is disarmed and this may be freed.
- * Used only by writers (RCU).
+ * Used only by writers (RCU) and post_kmmio_handler().
+ * Protected by kmmio_lock, when linked into kmmio_page_table.
*/
int count;
};
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
return NULL;
}
-static void set_page_present(unsigned long addr, bool present,
- unsigned int *pglevel)
+static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
+{
+ pmdval_t v = pmd_val(*pmd);
+ *old = !!(v & _PAGE_PRESENT);
+ v &= ~_PAGE_PRESENT;
+ if (present)
+ v |= _PAGE_PRESENT;
+ set_pmd(pmd, __pmd(v));
+}
+
+static void set_pte_presence(pte_t *pte, bool present, bool *old)
+{
+ pteval_t v = pte_val(*pte);
+ *old = !!(v & _PAGE_PRESENT);
+ v &= ~_PAGE_PRESENT;
+ if (present)
+ v |= _PAGE_PRESENT;
+ set_pte_atomic(pte, __pte(v));
+}
+
+static int set_page_presence(unsigned long addr, bool present, bool *old)
{
- pteval_t pteval;
- pmdval_t pmdval;
unsigned int level;
- pmd_t *pmd;
pte_t *pte = lookup_address(addr, &level);
if (!pte) {
pr_err("kmmio: no pte for page 0x%08lx\n", addr);
- return;
+ return -1;
}
- if (pglevel)
- *pglevel = level;
-
switch (level) {
case PG_LEVEL_2M:
- pmd = (pmd_t *)pte;
- pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
- if (present)
- pmdval |= _PAGE_PRESENT;
- set_pmd(pmd, __pmd(pmdval));
+ set_pmd_presence((pmd_t *)pte, present, old);
break;
-
case PG_LEVEL_4K:
- pteval = pte_val(*pte) & ~_PAGE_PRESENT;
- if (present)
- pteval |= _PAGE_PRESENT;
- set_pte_atomic(pte, __pte(pteval));
+ set_pte_presence(pte, present, old);
break;
-
default:
pr_err("kmmio: unexpected page level 0x%x.\n", level);
- return;
+ return -1;
}
__flush_tlb_one(addr);
+ return 0;
}
-/** Mark the given page as not present. Access to it will trigger a fault. */
-static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+/*
+ * Mark the given page as not present. Access to it will trigger a fault.
+ *
+ * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
+ * protection is ignored here. RCU read lock is assumed held, so the struct
+ * will not disappear unexpectedly. Furthermore, the caller must guarantee,
+ * that double arming the same virtual address (page) cannot occur.
+ *
+ * Double disarming on the other hand is allowed, and may occur when a fault
+ * and mmiotrace shutdown happen simultaneously.
+ */
+static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
{
- set_page_present(page & PAGE_MASK, false, pglevel);
+ int ret;
+ WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
+ if (f->armed) {
+ pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
+ f->page, f->count, f->old_presence);
+ }
+ ret = set_page_presence(f->page, false, &f->old_presence);
+ WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
+ f->armed = true;
+ return ret;
}
-/** Mark the given page as present. */
-static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+/** Restore the given page to saved presence state. */
+static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
{
- set_page_present(page & PAGE_MASK, true, pglevel);
+ bool tmp;
+ int ret = set_page_presence(f->page, f->old_presence, &tmp);
+ WARN_ONCE(ret < 0,
+ KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
+ f->armed = false;
}
/*
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx = &get_cpu_var(kmmio_ctx);
if (ctx->active) {
- disarm_kmmio_fault_page(faultpage->page, NULL);
if (addr == ctx->addr) {
/*
- * On SMP we sometimes get recursive probe hits on the
- * same address. Context is already saved, fall out.
+ * A second fault on the same page means some other
+ * condition needs handling by do_page_fault(), the
+ * page really not being present is the most common.
*/
- pr_debug("kmmio: duplicate probe hit on CPU %d, for "
- "address 0x%08lx.\n",
- smp_processor_id(), addr);
- ret = 1;
- goto no_kmmio_ctx;
- }
- /*
- * Prevent overwriting already in-flight context.
- * This should not happen, let's hope disarming at least
- * prevents a panic.
- */
- pr_emerg("kmmio: recursive probe hit on CPU %d, "
+ pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
+ addr, smp_processor_id());
+
+ if (!faultpage->old_presence)
+ pr_info("kmmio: unexpected secondary hit for "
+ "address 0x%08lx on CPU %d.\n", addr,
+ smp_processor_id());
+ } else {
+ /*
+ * Prevent overwriting already in-flight context.
+ * This should not happen, let's hope disarming at
+ * least prevents a panic.
+ */
+ pr_emerg("kmmio: recursive probe hit on CPU %d, "
"for address 0x%08lx. Ignoring.\n",
smp_processor_id(), addr);
- pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
- ctx->addr);
+ pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
+ ctx->addr);
+ disarm_kmmio_fault_page(faultpage);
+ }
goto no_kmmio_ctx;
}
ctx->active++;
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
regs->flags &= ~X86_EFLAGS_IF;
/* Now we set present bit in PTE and single step. */
- disarm_kmmio_fault_page(ctx->fpage->page, NULL);
+ disarm_kmmio_fault_page(ctx->fpage);
/*
* If another cpu accesses the same page while we are stepping,
@@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
if (!ctx->active) {
- pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+ pr_warning("kmmio: spurious debug trap on CPU %d.\n",
smp_processor_id());
goto out;
}
@@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
if (ctx->probe && ctx->probe->post_handler)
ctx->probe->post_handler(ctx->probe, condition, regs);
- arm_kmmio_fault_page(ctx->fpage->page, NULL);
+ /* Prevent racing against release_kmmio_fault_page(). */
+ spin_lock(&kmmio_lock);
+ if (ctx->fpage->count)
+ arm_kmmio_fault_page(ctx->fpage);
+ spin_unlock(&kmmio_lock);
regs->flags &= ~X86_EFLAGS_TF;
regs->flags |= ctx->saved_flags;
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page)
f = get_kmmio_fault_page(page);
if (f) {
if (!f->count)
- arm_kmmio_fault_page(f->page, NULL);
+ arm_kmmio_fault_page(f);
f->count++;
return 0;
}
- f = kmalloc(sizeof(*f), GFP_ATOMIC);
+ f = kzalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
return -1;
f->count = 1;
f->page = page;
- list_add_rcu(&f->list, kmmio_page_list(f->page));
- arm_kmmio_fault_page(f->page, NULL);
+ if (arm_kmmio_fault_page(f)) {
+ kfree(f);
+ return -1;
+ }
+
+ list_add_rcu(&f->list, kmmio_page_list(f->page));
return 0;
}
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page,
f->count--;
BUG_ON(f->count < 0);
if (!f->count) {
- disarm_kmmio_fault_page(f->page, NULL);
+ disarm_kmmio_fault_page(f);
f->release_next = *release_list;
*release_list = f;
}
@@ -408,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
static void remove_kmmio_fault_pages(struct rcu_head *head)
{
- struct kmmio_delayed_release *dr = container_of(
- head,
- struct kmmio_delayed_release,
- rcu);
+ struct kmmio_delayed_release *dr =
+ container_of(head, struct kmmio_delayed_release, rcu);
struct kmmio_fault_page *p = dr->release_list;
struct kmmio_fault_page **prevp = &dr->release_list;
unsigned long flags;
+
spin_lock_irqsave(&kmmio_lock, flags);
while (p) {
- if (!p->count)
+ if (!p->count) {
list_del_rcu(&p->list);
- else
+ prevp = &p->release_next;
+ } else {
*prevp = p->release_next;
- prevp = &p->release_next;
+ }
p = p->release_next;
}
spin_unlock_irqrestore(&kmmio_lock, flags);
+
/* This is the real RCU destroy call. */
call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8253bc97587e..9c4294986af7 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -522,6 +522,17 @@ static int split_large_page(pte_t *kpte, unsigned long address)
* primary protection behavior:
*/
__set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
+
+ /*
+ * Intel Atom errata AAH41 workaround.
+ *
+ * The real fix should be in hw or in a microcode update, but
+ * we also probabilistically try to reduce the window of having
+ * a large TLB mixed with 4K TLBs while instruction fetches are
+ * going on.
+ */
+ __flush_tlb_all();
+
base = NULL;
out_unlock:
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index ab50a8d7402c..427fd1b56df5 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -1,5 +1,5 @@
/*
- * Written by Pekka Paalanen, 2008 <pq@iki.fi>
+ * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
*/
#include <linux/module.h>
#include <linux/io.h>
@@ -9,35 +9,74 @@
static unsigned long mmio_address;
module_param(mmio_address, ulong, 0);
-MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
+MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
+ "(or 8 MB if read_far is non-zero).");
+
+static unsigned long read_far = 0x400100;
+module_param(read_far, ulong, 0);
+MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB "
+ "(default: 0x400100).");
+
+static unsigned v16(unsigned i)
+{
+ return i * 12 + 7;
+}
+
+static unsigned v32(unsigned i)
+{
+ return i * 212371 + 13;
+}
static void do_write_test(void __iomem *p)
{
unsigned int i;
+ pr_info(MODULE_NAME ": write test.\n");
mmiotrace_printk("Write test.\n");
+
for (i = 0; i < 256; i++)
iowrite8(i, p + i);
+
for (i = 1024; i < (5 * 1024); i += 2)
- iowrite16(i * 12 + 7, p + i);
+ iowrite16(v16(i), p + i);
+
for (i = (5 * 1024); i < (16 * 1024); i += 4)
- iowrite32(i * 212371 + 13, p + i);
+ iowrite32(v32(i), p + i);
}
static void do_read_test(void __iomem *p)
{
unsigned int i;
+ unsigned errs[3] = { 0 };
+ pr_info(MODULE_NAME ": read test.\n");
mmiotrace_printk("Read test.\n");
+
for (i = 0; i < 256; i++)
- ioread8(p + i);
+ if (ioread8(p + i) != i)
+ ++errs[0];
+
for (i = 1024; i < (5 * 1024); i += 2)
- ioread16(p + i);
+ if (ioread16(p + i) != v16(i))
+ ++errs[1];
+
for (i = (5 * 1024); i < (16 * 1024); i += 4)
- ioread32(p + i);
+ if (ioread32(p + i) != v32(i))
+ ++errs[2];
+
+ mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n",
+ errs[0], errs[1], errs[2]);
}
-static void do_test(void)
+static void do_read_far_test(void __iomem *p)
{
- void __iomem *p = ioremap_nocache(mmio_address, 0x4000);
+ pr_info(MODULE_NAME ": read far test.\n");
+ mmiotrace_printk("Read far test.\n");
+
+ ioread32(p + read_far);
+}
+
+static void do_test(unsigned long size)
+{
+ void __iomem *p = ioremap_nocache(mmio_address, size);
if (!p) {
pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
return;
@@ -45,11 +84,15 @@ static void do_test(void)
mmiotrace_printk("ioremap returned %p.\n", p);
do_write_test(p);
do_read_test(p);
+ if (read_far && read_far < size - 4)
+ do_read_far_test(p);
iounmap(p);
}
static int __init init(void)
{
+ unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
+
if (mmio_address == 0) {
pr_err(MODULE_NAME ": you have to use the module argument "
"mmio_address.\n");
@@ -58,10 +101,11 @@ static int __init init(void)
return -ENXIO;
}
- pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
- "in PCI address space, and writing "
- "rubbish in there.\n", mmio_address);
- do_test();
+ pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI "
+ "address space, and writing 16 kB of rubbish in there.\n",
+ size >> 10, mmio_address);
+ do_test(size);
+ pr_info(MODULE_NAME ": All done.\n");
return 0;
}
OpenPOWER on IntegriCloud