summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/crypto/Makefile4
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c94
-rw-r--r--arch/x86/crypto/chacha20_glue.c2
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c40
-rw-r--r--arch/x86/crypto/sha1-mb/Makefile (renamed from arch/x86/crypto/sha-mb/Makefile)0
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb.c (renamed from arch/x86/crypto/sha-mb/sha1_mb.c)288
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_ctx.h (renamed from arch/x86/crypto/sha-mb/sha_mb_ctx.h)2
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr.h (renamed from arch/x86/crypto/sha-mb/sha_mb_mgr.h)0
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S (renamed from arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S)0
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S (renamed from arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S)0
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c (renamed from arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c)2
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S (renamed from arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S)0
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_x8_avx2.S (renamed from arch/x86/crypto/sha-mb/sha1_x8_avx2.S)0
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c6
-rw-r--r--arch/x86/crypto/sha256-mb/Makefile11
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb.c1030
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_ctx.h136
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr.h108
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S304
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S304
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c65
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S215
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_x8_avx2.S593
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c10
-rw-r--r--arch/x86/crypto/sha512-mb/Makefile11
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb.c1046
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_ctx.h130
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr.h104
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S281
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S291
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c67
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S222
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_x4_avx2.S529
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c6
-rw-r--r--arch/x86/entry/common.c106
-rw-r--r--arch/x86/entry/vdso/vma.c20
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/events/amd/core.c6
-rw-r--r--arch/x86/events/amd/ibs.c72
-rw-r--r--arch/x86/events/amd/power.c60
-rw-r--r--arch/x86/events/amd/uncore.c122
-rw-r--r--arch/x86/events/core.c103
-rw-r--r--arch/x86/events/intel/core.c4
-rw-r--r--arch/x86/events/intel/cqm.c49
-rw-r--r--arch/x86/events/intel/cstate.c51
-rw-r--r--arch/x86/events/intel/rapl.c84
-rw-r--r--arch/x86/events/intel/uncore.c133
-rw-r--r--arch/x86/include/asm/acpi.h3
-rw-r--r--arch/x86/include/asm/cpu.h1
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/pgalloc.h12
-rw-r--r--arch/x86/include/asm/pmem.h77
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/smp.h3
-rw-r--r--arch/x86/include/asm/special_insns.h46
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/asm/xen/cpuid.h5
-rw-r--r--arch/x86/include/uapi/asm/vmx.h4
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/apb_timer.c29
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c80
-rw-r--r--arch/x86/kernel/hpet.c69
-rw-r--r--arch/x86/kernel/setup.c9
-rw-r--r--arch/x86/kernel/setup_percpu.c3
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/tboot.c25
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/mtrr.c1
-rw-r--r--arch/x86/kvm/vmx.c108
-rw-r--r--arch/x86/kvm/x86.c35
-rw-r--r--arch/x86/lib/x86-opcode-map.txt2
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/numa.c2
-rw-r--r--arch/x86/mm/pgtable.c10
-rw-r--r--arch/x86/mm/srat.c116
-rw-r--r--arch/x86/power/cpu.c30
-rw-r--r--arch/x86/power/hibernate_asm_64.S4
-rw-r--r--arch/x86/xen/efi.c111
-rw-r--r--arch/x86/xen/enlighten.c49
-rw-r--r--arch/x86/xen/grant-table.c57
-rw-r--r--arch/x86/xen/irq.c3
-rw-r--r--arch/x86/xen/pmu.c2
-rw-r--r--arch/x86/xen/smp.c18
-rw-r--r--arch/x86/xen/time.c63
-rw-r--r--arch/x86/xen/xen-ops.h1
90 files changed, 6392 insertions, 1350 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5977fea2c8b1..2fa55851d2a9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,7 @@ config X86
select ANON_INODES
select ARCH_CLOCKSOURCE_DATA
select ARCH_DISCARD_MEMBLOCK
+ select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_HAS_DEVMEM_IS_ALLOWED
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 6fce7f096b88..830ed391e7ef 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -126,14 +126,6 @@ else
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args)
endif
-# Make sure compiler does not have buggy stack-protector support.
-ifdef CONFIG_CC_STACKPROTECTOR
- cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
- ifneq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y)
- $(warning stack-protector enabled but compiler support broken)
- endif
-endif
-
ifdef CONFIG_X86_X32
x32_ld_ok := $(call try-run,\
/bin/echo -e '1: .quad 1b' | \
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b9b912a44d61..34b3fa2889d1 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -49,7 +49,9 @@ endif
ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
- obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/
+ obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
+ obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
+ obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 5b7fa1471007..0ab5ee1c26af 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -59,17 +59,6 @@ struct aesni_rfc4106_gcm_ctx {
u8 nonce[4];
};
-struct aesni_gcm_set_hash_subkey_result {
- int err;
- struct completion completion;
-};
-
-struct aesni_hash_subkey_req_data {
- u8 iv[16];
- struct aesni_gcm_set_hash_subkey_result result;
- struct scatterlist sg;
-};
-
struct aesni_lrw_ctx {
struct lrw_table_ctx lrw_table;
u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
@@ -809,71 +798,28 @@ static void rfc4106_exit(struct crypto_aead *aead)
cryptd_free_aead(*ctx);
}
-static void
-rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err)
-{
- struct aesni_gcm_set_hash_subkey_result *result = req->data;
-
- if (err == -EINPROGRESS)
- return;
- result->err = err;
- complete(&result->completion);
-}
-
static int
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
{
- struct crypto_ablkcipher *ctr_tfm;
- struct ablkcipher_request *req;
- int ret = -EINVAL;
- struct aesni_hash_subkey_req_data *req_data;
+ struct crypto_cipher *tfm;
+ int ret;
- ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0);
- if (IS_ERR(ctr_tfm))
- return PTR_ERR(ctr_tfm);
+ tfm = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
- ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len);
+ ret = crypto_cipher_setkey(tfm, key, key_len);
if (ret)
- goto out_free_ablkcipher;
-
- ret = -ENOMEM;
- req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL);
- if (!req)
- goto out_free_ablkcipher;
-
- req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
- if (!req_data)
- goto out_free_request;
-
- memset(req_data->iv, 0, sizeof(req_data->iv));
+ goto out_free_cipher;
/* Clear the data in the hash sub key container to zero.*/
/* We want to cipher all zeros to create the hash sub key. */
memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE);
- init_completion(&req_data->result.completion);
- sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE);
- ablkcipher_request_set_tfm(req, ctr_tfm);
- ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
- CRYPTO_TFM_REQ_MAY_BACKLOG,
- rfc4106_set_hash_subkey_done,
- &req_data->result);
-
- ablkcipher_request_set_crypt(req, &req_data->sg,
- &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv);
-
- ret = crypto_ablkcipher_encrypt(req);
- if (ret == -EINPROGRESS || ret == -EBUSY) {
- ret = wait_for_completion_interruptible
- (&req_data->result.completion);
- if (!ret)
- ret = req_data->result.err;
- }
- kfree(req_data);
-out_free_request:
- ablkcipher_request_free(req);
-out_free_ablkcipher:
- crypto_free_ablkcipher(ctr_tfm);
+ crypto_cipher_encrypt_one(tfm, hash_subkey, hash_subkey);
+
+out_free_cipher:
+ crypto_free_cipher(tfm);
return ret;
}
@@ -1098,9 +1044,12 @@ static int rfc4106_encrypt(struct aead_request *req)
struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
struct cryptd_aead *cryptd_tfm = *ctx;
- aead_request_set_tfm(req, irq_fpu_usable() ?
- cryptd_aead_child(cryptd_tfm) :
- &cryptd_tfm->base);
+ tfm = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ tfm = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, tfm);
return crypto_aead_encrypt(req);
}
@@ -1111,9 +1060,12 @@ static int rfc4106_decrypt(struct aead_request *req)
struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
struct cryptd_aead *cryptd_tfm = *ctx;
- aead_request_set_tfm(req, irq_fpu_usable() ?
- cryptd_aead_child(cryptd_tfm) :
- &cryptd_tfm->base);
+ tfm = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ tfm = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, tfm);
return crypto_aead_decrypt(req);
}
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 2d5c2e0bd939..f910d1d449f0 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -70,7 +70,7 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
struct blkcipher_walk walk;
int err;
- if (!may_use_simd())
+ if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd())
return crypto_chacha20_crypt(desc, dst, src, nbytes);
state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index a69321a77783..0420bab19efb 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -168,30 +168,23 @@ static int ghash_async_init(struct ahash_request *req)
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+ struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+ struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
- if (!irq_fpu_usable()) {
- memcpy(cryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
- return crypto_ahash_init(cryptd_req);
- } else {
- struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
- struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
-
- desc->tfm = child;
- desc->flags = req->base.flags;
- return crypto_shash_init(desc);
- }
+ desc->tfm = child;
+ desc->flags = req->base.flags;
+ return crypto_shash_init(desc);
}
static int ghash_async_update(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- if (!irq_fpu_usable()) {
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
- struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
+ if (!irq_fpu_usable() ||
+ (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_update(cryptd_req);
@@ -204,12 +197,12 @@ static int ghash_async_update(struct ahash_request *req)
static int ghash_async_final(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- if (!irq_fpu_usable()) {
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
- struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
+ if (!irq_fpu_usable() ||
+ (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_final(cryptd_req);
@@ -249,7 +242,8 @@ static int ghash_async_digest(struct ahash_request *req)
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- if (!irq_fpu_usable()) {
+ if (!irq_fpu_usable() ||
+ (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_digest(cryptd_req);
diff --git a/arch/x86/crypto/sha-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
index 2f8756375df5..2f8756375df5 100644
--- a/arch/x86/crypto/sha-mb/Makefile
+++ b/arch/x86/crypto/sha1-mb/Makefile
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
index 9c5af331a956..9e5b67127a09 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -67,7 +67,7 @@
#include <asm/byteorder.h>
#include <linux/hardirq.h>
#include <asm/fpu/api.h>
-#include "sha_mb_ctx.h"
+#include "sha1_mb_ctx.h"
#define FLUSH_INTERVAL 1000 /* in usec */
@@ -77,30 +77,34 @@ struct sha1_mb_ctx {
struct mcryptd_ahash *mcryptd_tfm;
};
-static inline struct mcryptd_hash_request_ctx *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
+static inline struct mcryptd_hash_request_ctx
+ *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
{
- struct shash_desc *desc;
+ struct ahash_request *areq;
- desc = container_of((void *) hash_ctx, struct shash_desc, __ctx);
- return container_of(desc, struct mcryptd_hash_request_ctx, desc);
+ areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
+ return container_of(areq, struct mcryptd_hash_request_ctx, areq);
}
-static inline struct ahash_request *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
+static inline struct ahash_request
+ *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
{
return container_of((void *) ctx, struct ahash_request, __ctx);
}
static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
- struct shash_desc *desc)
+ struct ahash_request *areq)
{
rctx->flag = HASH_UPDATE;
}
static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)(struct sha1_mb_mgr *state,
- struct job_sha1 *job);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_mgr *state);
+static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)
+ (struct sha1_mb_mgr *state, struct job_sha1 *job);
+static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
+ (struct sha1_mb_mgr *state);
+static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
+ (struct sha1_mb_mgr *state);
static inline void sha1_init_digest(uint32_t *digest)
{
@@ -131,7 +135,8 @@ static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
return i >> SHA1_LOG2_BLOCK_SIZE;
}
-static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, struct sha1_hash_ctx *ctx)
+static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr,
+ struct sha1_hash_ctx *ctx)
{
while (ctx) {
if (ctx->status & HASH_CTX_STS_COMPLETE) {
@@ -177,8 +182,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, str
ctx->job.buffer = (uint8_t *) buffer;
ctx->job.len = len;
- ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr,
- &ctx->job);
+ ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr,
+ &ctx->job);
continue;
}
}
@@ -191,13 +196,15 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, str
if (ctx->status & HASH_CTX_STS_LAST) {
uint8_t *buf = ctx->partial_block_buffer;
- uint32_t n_extra_blocks = sha1_pad(buf, ctx->total_length);
+ uint32_t n_extra_blocks =
+ sha1_pad(buf, ctx->total_length);
ctx->status = (HASH_CTX_STS_PROCESSING |
HASH_CTX_STS_COMPLETE);
ctx->job.buffer = buf;
ctx->job.len = (uint32_t) n_extra_blocks;
- ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
+ ctx = (struct sha1_hash_ctx *)
+ sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
continue;
}
@@ -208,14 +215,17 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, str
return NULL;
}
-static struct sha1_hash_ctx *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
+static struct sha1_hash_ctx
+ *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
{
/*
* If get_comp_job returns NULL, there are no jobs complete.
- * If get_comp_job returns a job, verify that it is safe to return to the user.
+ * If get_comp_job returns a job, verify that it is safe to return to
+ * the user.
* If it is not ready, resubmit the job to finish processing.
* If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
- * Otherwise, all jobs currently being managed by the hash_ctx_mgr still need processing.
+ * Otherwise, all jobs currently being managed by the hash_ctx_mgr
+ * still need processing.
*/
struct sha1_hash_ctx *ctx;
@@ -235,7 +245,10 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
int flags)
{
if (flags & (~HASH_ENTIRE)) {
- /* User should not pass anything other than FIRST, UPDATE, or LAST */
+ /*
+ * User should not pass anything other than FIRST, UPDATE, or
+ * LAST
+ */
ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
return ctx;
}
@@ -264,14 +277,20 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
ctx->partial_block_buffer_length = 0;
}
- /* If we made it here, there were no errors during this call to submit */
+ /*
+ * If we made it here, there were no errors during this call to
+ * submit
+ */
ctx->error = HASH_CTX_ERROR_NONE;
/* Store buffer ptr info from user */
ctx->incoming_buffer = buffer;
ctx->incoming_buffer_length = len;
- /* Store the user's request flags and mark this ctx as currently being processed. */
+ /*
+ * Store the user's request flags and mark this ctx as currently
+ * being processed.
+ */
ctx->status = (flags & HASH_LAST) ?
(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
HASH_CTX_STS_PROCESSING;
@@ -285,9 +304,13 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
* Or if the user's buffer contains less than a whole block,
* append as much as possible to the extra block.
*/
- if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) {
- /* Compute how many bytes to copy from user buffer into extra block */
- uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length;
+ if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) {
+ /*
+ * Compute how many bytes to copy from user buffer into
+ * extra block
+ */
+ uint32_t copy_len = SHA1_BLOCK_SIZE -
+ ctx->partial_block_buffer_length;
if (len < copy_len)
copy_len = len;
@@ -297,20 +320,28 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
buffer, copy_len);
ctx->partial_block_buffer_length += copy_len;
- ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len);
+ ctx->incoming_buffer = (const void *)
+ ((const char *)buffer + copy_len);
ctx->incoming_buffer_length = len - copy_len;
}
- /* The extra block should never contain more than 1 block here */
+ /*
+ * The extra block should never contain more than 1 block
+ * here
+ */
assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
- /* If the extra block buffer contains exactly 1 block, it can be hashed. */
+ /*
+ * If the extra block buffer contains exactly 1 block, it can
+ * be hashed.
+ */
if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
ctx->partial_block_buffer_length = 0;
ctx->job.buffer = ctx->partial_block_buffer;
ctx->job.len = 1;
- ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
+ ctx = (struct sha1_hash_ctx *)
+ sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
}
}
@@ -329,23 +360,24 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
return NULL;
/*
- * If flush returned a job, resubmit the job to finish processing.
+ * If flush returned a job, resubmit the job to finish
+ * processing.
*/
ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
/*
- * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
- * Otherwise, all jobs currently being managed by the sha1_ctx_mgr
- * still need processing. Loop.
+ * If sha1_ctx_mgr_resubmit returned a job, it is ready to be
+ * returned. Otherwise, all jobs currently being managed by the
+ * sha1_ctx_mgr still need processing. Loop.
*/
if (ctx)
return ctx;
}
}
-static int sha1_mb_init(struct shash_desc *desc)
+static int sha1_mb_init(struct ahash_request *areq)
{
- struct sha1_hash_ctx *sctx = shash_desc_ctx(desc);
+ struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
hash_ctx_init(sctx);
sctx->job.result_digest[0] = SHA1_H0;
@@ -363,7 +395,7 @@ static int sha1_mb_init(struct shash_desc *desc)
static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
{
int i;
- struct sha1_hash_ctx *sctx = shash_desc_ctx(&rctx->desc);
+ struct sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
__be32 *dst = (__be32 *) rctx->out;
for (i = 0; i < 5; ++i)
@@ -394,9 +426,11 @@ static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
flag |= HASH_LAST;
}
- sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(&rctx->desc);
+ sha_ctx = (struct sha1_hash_ctx *)
+ ahash_request_ctx(&rctx->areq);
kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag);
+ sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx,
+ rctx->walk.data, nbytes, flag);
if (!sha_ctx) {
if (flush)
sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
@@ -485,11 +519,10 @@ static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
mcryptd_arm_flusher(cstate, delay);
}
-static int sha1_mb_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int sha1_mb_update(struct ahash_request *areq)
{
struct mcryptd_hash_request_ctx *rctx =
- container_of(desc, struct mcryptd_hash_request_ctx, desc);
+ container_of(areq, struct mcryptd_hash_request_ctx, areq);
struct mcryptd_alg_cstate *cstate =
this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
@@ -505,7 +538,7 @@ static int sha1_mb_update(struct shash_desc *desc, const u8 *data,
}
/* need to init context */
- req_ctx_init(rctx, desc);
+ req_ctx_init(rctx, areq);
nbytes = crypto_ahash_walk_first(req, &rctx->walk);
@@ -518,10 +551,11 @@ static int sha1_mb_update(struct shash_desc *desc, const u8 *data,
rctx->flag |= HASH_DONE;
/* submit */
- sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc);
+ sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
sha1_mb_add_list(rctx, cstate);
kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, HASH_UPDATE);
+ sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+ nbytes, HASH_UPDATE);
kernel_fpu_end();
/* check if anything is returned */
@@ -544,11 +578,10 @@ done:
return ret;
}
-static int sha1_mb_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
+static int sha1_mb_finup(struct ahash_request *areq)
{
struct mcryptd_hash_request_ctx *rctx =
- container_of(desc, struct mcryptd_hash_request_ctx, desc);
+ container_of(areq, struct mcryptd_hash_request_ctx, areq);
struct mcryptd_alg_cstate *cstate =
this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
@@ -563,7 +596,7 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data,
}
/* need to init context */
- req_ctx_init(rctx, desc);
+ req_ctx_init(rctx, areq);
nbytes = crypto_ahash_walk_first(req, &rctx->walk);
@@ -576,15 +609,15 @@ static int sha1_mb_finup(struct shash_desc *desc, const u8 *data,
rctx->flag |= HASH_DONE;
flag = HASH_LAST;
}
- rctx->out = out;
/* submit */
rctx->flag |= HASH_FINAL;
- sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc);
+ sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
sha1_mb_add_list(rctx, cstate);
kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag);
+ sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+ nbytes, flag);
kernel_fpu_end();
/* check if anything is returned */
@@ -605,10 +638,10 @@ done:
return ret;
}
-static int sha1_mb_final(struct shash_desc *desc, u8 *out)
+static int sha1_mb_final(struct ahash_request *areq)
{
struct mcryptd_hash_request_ctx *rctx =
- container_of(desc, struct mcryptd_hash_request_ctx, desc);
+ container_of(areq, struct mcryptd_hash_request_ctx, areq);
struct mcryptd_alg_cstate *cstate =
this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
@@ -623,16 +656,16 @@ static int sha1_mb_final(struct shash_desc *desc, u8 *out)
}
/* need to init context */
- req_ctx_init(rctx, desc);
+ req_ctx_init(rctx, areq);
- rctx->out = out;
rctx->flag |= HASH_DONE | HASH_FINAL;
- sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc);
+ sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
/* flag HASH_FINAL and 0 data size */
sha1_mb_add_list(rctx, cstate);
kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, HASH_LAST);
+ sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
+ HASH_LAST);
kernel_fpu_end();
/* check if anything is returned */
@@ -654,48 +687,98 @@ done:
return ret;
}
-static int sha1_mb_export(struct shash_desc *desc, void *out)
+static int sha1_mb_export(struct ahash_request *areq, void *out)
{
- struct sha1_hash_ctx *sctx = shash_desc_ctx(desc);
+ struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
-static int sha1_mb_import(struct shash_desc *desc, const void *in)
+static int sha1_mb_import(struct ahash_request *areq, const void *in)
{
- struct sha1_hash_ctx *sctx = shash_desc_ctx(desc);
+ struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
+static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
+{
+ struct mcryptd_ahash *mcryptd_tfm;
+ struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct mcryptd_hash_ctx *mctx;
-static struct shash_alg sha1_mb_shash_alg = {
- .digestsize = SHA1_DIGEST_SIZE,
+ mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
+ CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
+ if (IS_ERR(mcryptd_tfm))
+ return PTR_ERR(mcryptd_tfm);
+ mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
+ mctx->alg_state = &sha1_mb_alg_state;
+ ctx->mcryptd_tfm = mcryptd_tfm;
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct ahash_request) +
+ crypto_ahash_reqsize(&mcryptd_tfm->base));
+
+ return 0;
+}
+
+static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm)
+{
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct ahash_request) +
+ sizeof(struct sha1_hash_ctx));
+
+ return 0;
+}
+
+static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static struct ahash_alg sha1_mb_areq_alg = {
.init = sha1_mb_init,
.update = sha1_mb_update,
.final = sha1_mb_final,
.finup = sha1_mb_finup,
.export = sha1_mb_export,
.import = sha1_mb_import,
- .descsize = sizeof(struct sha1_hash_ctx),
- .statesize = sizeof(struct sha1_hash_ctx),
- .base = {
- .cra_name = "__sha1-mb",
- .cra_driver_name = "__intel_sha1-mb",
- .cra_priority = 100,
- /*
- * use ASYNC flag as some buffers in multi-buffer
- * algo may not have completed before hashing thread sleep
- */
- .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list),
+ .halg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .statesize = sizeof(struct sha1_hash_ctx),
+ .base = {
+ .cra_name = "__sha1-mb",
+ .cra_driver_name = "__intel_sha1-mb",
+ .cra_priority = 100,
+ /*
+ * use ASYNC flag as some buffers in multi-buffer
+ * algo may not have completed before hashing thread
+ * sleep
+ */
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT
+ (sha1_mb_areq_alg.halg.base.cra_list),
+ .cra_init = sha1_mb_areq_init_tfm,
+ .cra_exit = sha1_mb_areq_exit_tfm,
+ .cra_ctxsize = sizeof(struct sha1_hash_ctx),
+ }
}
};
@@ -780,46 +863,20 @@ static int sha1_mb_async_import(struct ahash_request *req, const void *in)
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
- struct crypto_shash *child = mcryptd_ahash_child(mcryptd_tfm);
+ struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
struct mcryptd_hash_request_ctx *rctx;
- struct shash_desc *desc;
+ struct ahash_request *areq;
memcpy(mcryptd_req, req, sizeof(*req));
ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
rctx = ahash_request_ctx(mcryptd_req);
- desc = &rctx->desc;
- desc->tfm = child;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
- return crypto_ahash_import(mcryptd_req, in);
-}
-
-static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
- struct mcryptd_ahash *mcryptd_tfm;
- struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
- struct mcryptd_hash_ctx *mctx;
+ areq = &rctx->areq;
- mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(mcryptd_tfm))
- return PTR_ERR(mcryptd_tfm);
- mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
- mctx->alg_state = &sha1_mb_alg_state;
- ctx->mcryptd_tfm = mcryptd_tfm;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(&mcryptd_tfm->base));
+ ahash_request_set_tfm(areq, child);
+ ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
+ rctx->complete, req);
- return 0;
-}
-
-static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
+ return crypto_ahash_import(mcryptd_req, in);
}
static struct ahash_alg sha1_mb_async_alg = {
@@ -866,7 +923,8 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
if (time_before(cur_time, rctx->tag.expire))
break;
kernel_fpu_begin();
- sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr);
+ sha_ctx = (struct sha1_hash_ctx *)
+ sha1_ctx_mgr_flush(cstate->mgr);
kernel_fpu_end();
if (!sha_ctx) {
pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
@@ -927,7 +985,7 @@ static int __init sha1_mb_mod_init(void)
}
sha1_mb_alg_state.flusher = &sha1_mb_flusher;
- err = crypto_register_shash(&sha1_mb_shash_alg);
+ err = crypto_register_ahash(&sha1_mb_areq_alg);
if (err)
goto err2;
err = crypto_register_ahash(&sha1_mb_async_alg);
@@ -937,7 +995,7 @@ static int __init sha1_mb_mod_init(void)
return 0;
err1:
- crypto_unregister_shash(&sha1_mb_shash_alg);
+ crypto_unregister_ahash(&sha1_mb_areq_alg);
err2:
for_each_possible_cpu(cpu) {
cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
@@ -953,7 +1011,7 @@ static void __exit sha1_mb_mod_fini(void)
struct mcryptd_alg_cstate *cpu_state;
crypto_unregister_ahash(&sha1_mb_async_alg);
- crypto_unregister_shash(&sha1_mb_shash_alg);
+ crypto_unregister_ahash(&sha1_mb_areq_alg);
for_each_possible_cpu(cpu) {
cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
kfree(cpu_state->mgr);
diff --git a/arch/x86/crypto/sha-mb/sha_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
index e36069d0c1bd..98a35bcc6f4a 100644
--- a/arch/x86/crypto/sha-mb/sha_mb_ctx.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
@@ -54,7 +54,7 @@
#ifndef _SHA_MB_CTX_INTERNAL_H
#define _SHA_MB_CTX_INTERNAL_H
-#include "sha_mb_mgr.h"
+#include "sha1_mb_mgr.h"
#define HASH_UPDATE 0x00
#define HASH_FIRST 0x01
diff --git a/arch/x86/crypto/sha-mb/sha_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
index 08ad1a9acfd7..08ad1a9acfd7 100644
--- a/arch/x86/crypto/sha-mb/sha_mb_mgr.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
index 86688c6e7a25..86688c6e7a25 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
index 96df6a39d7e2..96df6a39d7e2 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
index 822acb5b464c..d2add0d35f43 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
@@ -51,7 +51,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "sha_mb_mgr.h"
+#include "sha1_mb_mgr.h"
void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
{
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
index 63a0d9c8e31f..63a0d9c8e31f 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
diff --git a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
index c9dae1cd2919..c9dae1cd2919 100644
--- a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 1024e378a358..fc61739150e7 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -374,3 +374,9 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS_CRYPTO("sha1");
+MODULE_ALIAS_CRYPTO("sha1-ssse3");
+MODULE_ALIAS_CRYPTO("sha1-avx");
+MODULE_ALIAS_CRYPTO("sha1-avx2");
+#ifdef CONFIG_AS_SHA1_NI
+MODULE_ALIAS_CRYPTO("sha1-ni");
+#endif
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
new file mode 100644
index 000000000000..41089e7c400c
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/Makefile
@@ -0,0 +1,11 @@
+#
+# Arch-specific CryptoAPI modules.
+#
+
+avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
+ $(comma)4)$(comma)%ymm2,yes,no)
+ifeq ($(avx2_supported),yes)
+ obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
+ sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
+ sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
+endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
new file mode 100644
index 000000000000..89fa85e8b10c
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb.c
@@ -0,0 +1,1030 @@
+/*
+ * Multi buffer SHA256 algorithm Glue Code
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+#include <crypto/mcryptd.h>
+#include <crypto/crypto_wq.h>
+#include <asm/byteorder.h>
+#include <linux/hardirq.h>
+#include <asm/fpu/api.h>
+#include "sha256_mb_ctx.h"
+
+#define FLUSH_INTERVAL 1000 /* in usec */
+
+static struct mcryptd_alg_state sha256_mb_alg_state;
+
+struct sha256_mb_ctx {
+ struct mcryptd_ahash *mcryptd_tfm;
+};
+
+static inline struct mcryptd_hash_request_ctx
+ *cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx)
+{
+ struct ahash_request *areq;
+
+ areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
+ return container_of(areq, struct mcryptd_hash_request_ctx, areq);
+}
+
+static inline struct ahash_request
+ *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
+{
+ return container_of((void *) ctx, struct ahash_request, __ctx);
+}
+
+static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
+ struct ahash_request *areq)
+{
+ rctx->flag = HASH_UPDATE;
+}
+
+static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state);
+static asmlinkage struct job_sha256* (*sha256_job_mgr_submit)
+ (struct sha256_mb_mgr *state, struct job_sha256 *job);
+static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
+ (struct sha256_mb_mgr *state);
+static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
+ (struct sha256_mb_mgr *state);
+
+inline void sha256_init_digest(uint32_t *digest)
+{
+ static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = {
+ SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+ SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7};
+ memcpy(digest, initial_digest, sizeof(initial_digest));
+}
+
+inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
+ uint32_t total_len)
+{
+ uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
+
+ memset(&padblock[i], 0, SHA256_BLOCK_SIZE);
+ padblock[i] = 0x80;
+
+ i += ((SHA256_BLOCK_SIZE - 1) &
+ (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1)))
+ + 1 + SHA256_PADLENGTHFIELD_SIZE;
+
+#if SHA256_PADLENGTHFIELD_SIZE == 16
+ *((uint64_t *) &padblock[i - 16]) = 0;
+#endif
+
+ *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
+
+ /* Number of extra blocks to hash */
+ return i >> SHA256_LOG2_BLOCK_SIZE;
+}
+
+static struct sha256_hash_ctx
+ *sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr,
+ struct sha256_hash_ctx *ctx)
+{
+ while (ctx) {
+ if (ctx->status & HASH_CTX_STS_COMPLETE) {
+ /* Clear PROCESSING bit */
+ ctx->status = HASH_CTX_STS_COMPLETE;
+ return ctx;
+ }
+
+ /*
+ * If the extra blocks are empty, begin hashing what remains
+ * in the user's buffer.
+ */
+ if (ctx->partial_block_buffer_length == 0 &&
+ ctx->incoming_buffer_length) {
+
+ const void *buffer = ctx->incoming_buffer;
+ uint32_t len = ctx->incoming_buffer_length;
+ uint32_t copy_len;
+
+ /*
+ * Only entire blocks can be hashed.
+ * Copy remainder to extra blocks buffer.
+ */
+ copy_len = len & (SHA256_BLOCK_SIZE-1);
+
+ if (copy_len) {
+ len -= copy_len;
+ memcpy(ctx->partial_block_buffer,
+ ((const char *) buffer + len),
+ copy_len);
+ ctx->partial_block_buffer_length = copy_len;
+ }
+
+ ctx->incoming_buffer_length = 0;
+
+ /* len should be a multiple of the block size now */
+ assert((len % SHA256_BLOCK_SIZE) == 0);
+
+ /* Set len to the number of blocks to be hashed */
+ len >>= SHA256_LOG2_BLOCK_SIZE;
+
+ if (len) {
+
+ ctx->job.buffer = (uint8_t *) buffer;
+ ctx->job.len = len;
+ ctx = (struct sha256_hash_ctx *)
+ sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
+ continue;
+ }
+ }
+
+ /*
+ * If the extra blocks are not empty, then we are
+ * either on the last block(s) or we need more
+ * user input before continuing.
+ */
+ if (ctx->status & HASH_CTX_STS_LAST) {
+
+ uint8_t *buf = ctx->partial_block_buffer;
+ uint32_t n_extra_blocks =
+ sha256_pad(buf, ctx->total_length);
+
+ ctx->status = (HASH_CTX_STS_PROCESSING |
+ HASH_CTX_STS_COMPLETE);
+ ctx->job.buffer = buf;
+ ctx->job.len = (uint32_t) n_extra_blocks;
+ ctx = (struct sha256_hash_ctx *)
+ sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
+ continue;
+ }
+
+ ctx->status = HASH_CTX_STS_IDLE;
+ return ctx;
+ }
+
+ return NULL;
+}
+
+static struct sha256_hash_ctx
+ *sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr)
+{
+ /*
+ * If get_comp_job returns NULL, there are no jobs complete.
+ * If get_comp_job returns a job, verify that it is safe to return to
+ * the user. If it is not ready, resubmit the job to finish processing.
+ * If sha256_ctx_mgr_resubmit returned a job, it is ready to be
+ * returned. Otherwise, all jobs currently being managed by the
+ * hash_ctx_mgr still need processing.
+ */
+ struct sha256_hash_ctx *ctx;
+
+ ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr);
+ return sha256_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr)
+{
+ sha256_job_mgr_init(&mgr->mgr);
+}
+
+static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
+ struct sha256_hash_ctx *ctx,
+ const void *buffer,
+ uint32_t len,
+ int flags)
+{
+ if (flags & (~HASH_ENTIRE)) {
+ /* User should not pass anything other than FIRST, UPDATE
+ * or LAST
+ */
+ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
+ return ctx;
+ }
+
+ if (ctx->status & HASH_CTX_STS_PROCESSING) {
+ /* Cannot submit to a currently processing job. */
+ ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
+ return ctx;
+ }
+
+ if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+ /* Cannot update a finished job. */
+ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
+ return ctx;
+ }
+
+ if (flags & HASH_FIRST) {
+ /* Init digest */
+ sha256_init_digest(ctx->job.result_digest);
+
+ /* Reset byte counter */
+ ctx->total_length = 0;
+
+ /* Clear extra blocks */
+ ctx->partial_block_buffer_length = 0;
+ }
+
+ /* If we made it here, there was no error during this call to submit */
+ ctx->error = HASH_CTX_ERROR_NONE;
+
+ /* Store buffer ptr info from user */
+ ctx->incoming_buffer = buffer;
+ ctx->incoming_buffer_length = len;
+
+ /*
+ * Store the user's request flags and mark this ctx as currently
+ * being processed.
+ */
+ ctx->status = (flags & HASH_LAST) ?
+ (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
+ HASH_CTX_STS_PROCESSING;
+
+ /* Advance byte counter */
+ ctx->total_length += len;
+
+ /*
+ * If there is anything currently buffered in the extra blocks,
+ * append to it until it contains a whole block.
+ * Or if the user's buffer contains less than a whole block,
+ * append as much as possible to the extra block.
+ */
+ if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) {
+ /*
+ * Compute how many bytes to copy from user buffer into
+ * extra block
+ */
+ uint32_t copy_len = SHA256_BLOCK_SIZE -
+ ctx->partial_block_buffer_length;
+ if (len < copy_len)
+ copy_len = len;
+
+ if (copy_len) {
+ /* Copy and update relevant pointers and counters */
+ memcpy(
+ &ctx->partial_block_buffer[ctx->partial_block_buffer_length],
+ buffer, copy_len);
+
+ ctx->partial_block_buffer_length += copy_len;
+ ctx->incoming_buffer = (const void *)
+ ((const char *)buffer + copy_len);
+ ctx->incoming_buffer_length = len - copy_len;
+ }
+
+ /* The extra block should never contain more than 1 block */
+ assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE);
+
+ /*
+ * If the extra block buffer contains exactly 1 block,
+ * it can be hashed.
+ */
+ if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) {
+ ctx->partial_block_buffer_length = 0;
+
+ ctx->job.buffer = ctx->partial_block_buffer;
+ ctx->job.len = 1;
+ ctx = (struct sha256_hash_ctx *)
+ sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
+ }
+ }
+
+ return sha256_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr)
+{
+ struct sha256_hash_ctx *ctx;
+
+ while (1) {
+ ctx = (struct sha256_hash_ctx *)
+ sha256_job_mgr_flush(&mgr->mgr);
+
+ /* If flush returned 0, there are no more jobs in flight. */
+ if (!ctx)
+ return NULL;
+
+ /*
+ * If flush returned a job, resubmit the job to finish
+ * processing.
+ */
+ ctx = sha256_ctx_mgr_resubmit(mgr, ctx);
+
+ /*
+ * If sha256_ctx_mgr_resubmit returned a job, it is ready to
+ * be returned. Otherwise, all jobs currently being managed by
+ * the sha256_ctx_mgr still need processing. Loop.
+ */
+ if (ctx)
+ return ctx;
+ }
+}
+
+static int sha256_mb_init(struct ahash_request *areq)
+{
+ struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
+
+ hash_ctx_init(sctx);
+ sctx->job.result_digest[0] = SHA256_H0;
+ sctx->job.result_digest[1] = SHA256_H1;
+ sctx->job.result_digest[2] = SHA256_H2;
+ sctx->job.result_digest[3] = SHA256_H3;
+ sctx->job.result_digest[4] = SHA256_H4;
+ sctx->job.result_digest[5] = SHA256_H5;
+ sctx->job.result_digest[6] = SHA256_H6;
+ sctx->job.result_digest[7] = SHA256_H7;
+ sctx->total_length = 0;
+ sctx->partial_block_buffer_length = 0;
+ sctx->status = HASH_CTX_STS_IDLE;
+
+ return 0;
+}
+
+static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
+{
+ int i;
+ struct sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
+ __be32 *dst = (__be32 *) rctx->out;
+
+ for (i = 0; i < 8; ++i)
+ dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
+
+ return 0;
+}
+
+static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
+ struct mcryptd_alg_cstate *cstate, bool flush)
+{
+ int flag = HASH_UPDATE;
+ int nbytes, err = 0;
+ struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
+ struct sha256_hash_ctx *sha_ctx;
+
+ /* more work ? */
+ while (!(rctx->flag & HASH_DONE)) {
+ nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
+ if (nbytes < 0) {
+ err = nbytes;
+ goto out;
+ }
+ /* check if the walk is done */
+ if (crypto_ahash_walk_last(&rctx->walk)) {
+ rctx->flag |= HASH_DONE;
+ if (rctx->flag & HASH_FINAL)
+ flag |= HASH_LAST;
+
+ }
+ sha_ctx = (struct sha256_hash_ctx *)
+ ahash_request_ctx(&rctx->areq);
+ kernel_fpu_begin();
+ sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx,
+ rctx->walk.data, nbytes, flag);
+ if (!sha_ctx) {
+ if (flush)
+ sha_ctx = sha256_ctx_mgr_flush(cstate->mgr);
+ }
+ kernel_fpu_end();
+ if (sha_ctx)
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ else {
+ rctx = NULL;
+ goto out;
+ }
+ }
+
+ /* copy the results */
+ if (rctx->flag & HASH_FINAL)
+ sha256_mb_set_results(rctx);
+
+out:
+ *ret_rctx = rctx;
+ return err;
+}
+
+static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
+ struct mcryptd_alg_cstate *cstate,
+ int err)
+{
+ struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+ struct sha256_hash_ctx *sha_ctx;
+ struct mcryptd_hash_request_ctx *req_ctx;
+ int ret;
+
+ /* remove from work list */
+ spin_lock(&cstate->work_lock);
+ list_del(&rctx->waiter);
+ spin_unlock(&cstate->work_lock);
+
+ if (irqs_disabled())
+ rctx->complete(&req->base, err);
+ else {
+ local_bh_disable();
+ rctx->complete(&req->base, err);
+ local_bh_enable();
+ }
+
+ /* check to see if there are other jobs that are done */
+ sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
+ while (sha_ctx) {
+ req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&req_ctx, cstate, false);
+ if (req_ctx) {
+ spin_lock(&cstate->work_lock);
+ list_del(&req_ctx->waiter);
+ spin_unlock(&cstate->work_lock);
+
+ req = cast_mcryptd_ctx_to_req(req_ctx);
+ if (irqs_disabled())
+ rctx->complete(&req->base, ret);
+ else {
+ local_bh_disable();
+ rctx->complete(&req->base, ret);
+ local_bh_enable();
+ }
+ }
+ sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
+ }
+
+ return 0;
+}
+
+static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
+ struct mcryptd_alg_cstate *cstate)
+{
+ unsigned long next_flush;
+ unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
+
+ /* initialize tag */
+ rctx->tag.arrival = jiffies; /* tag the arrival time */
+ rctx->tag.seq_num = cstate->next_seq_num++;
+ next_flush = rctx->tag.arrival + delay;
+ rctx->tag.expire = next_flush;
+
+ spin_lock(&cstate->work_lock);
+ list_add_tail(&rctx->waiter, &cstate->work_list);
+ spin_unlock(&cstate->work_lock);
+
+ mcryptd_arm_flusher(cstate, delay);
+}
+
+static int sha256_mb_update(struct ahash_request *areq)
+{
+ struct mcryptd_hash_request_ctx *rctx =
+ container_of(areq, struct mcryptd_hash_request_ctx, areq);
+ struct mcryptd_alg_cstate *cstate =
+ this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
+
+ struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+ struct sha256_hash_ctx *sha_ctx;
+ int ret = 0, nbytes;
+
+ /* sanity check */
+ if (rctx->tag.cpu != smp_processor_id()) {
+ pr_err("mcryptd error: cpu clash\n");
+ goto done;
+ }
+
+ /* need to init context */
+ req_ctx_init(rctx, areq);
+
+ nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+ if (nbytes < 0) {
+ ret = nbytes;
+ goto done;
+ }
+
+ if (crypto_ahash_walk_last(&rctx->walk))
+ rctx->flag |= HASH_DONE;
+
+ /* submit */
+ sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
+ sha256_mb_add_list(rctx, cstate);
+ kernel_fpu_begin();
+ sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+ nbytes, HASH_UPDATE);
+ kernel_fpu_end();
+
+ /* check if anything is returned */
+ if (!sha_ctx)
+ return -EINPROGRESS;
+
+ if (sha_ctx->error) {
+ ret = sha_ctx->error;
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ goto done;
+ }
+
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&rctx, cstate, false);
+
+ if (!rctx)
+ return -EINPROGRESS;
+done:
+ sha_complete_job(rctx, cstate, ret);
+ return ret;
+}
+
+static int sha256_mb_finup(struct ahash_request *areq)
+{
+ struct mcryptd_hash_request_ctx *rctx =
+ container_of(areq, struct mcryptd_hash_request_ctx, areq);
+ struct mcryptd_alg_cstate *cstate =
+ this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
+
+ struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+ struct sha256_hash_ctx *sha_ctx;
+ int ret = 0, flag = HASH_UPDATE, nbytes;
+
+ /* sanity check */
+ if (rctx->tag.cpu != smp_processor_id()) {
+ pr_err("mcryptd error: cpu clash\n");
+ goto done;
+ }
+
+ /* need to init context */
+ req_ctx_init(rctx, areq);
+
+ nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+ if (nbytes < 0) {
+ ret = nbytes;
+ goto done;
+ }
+
+ if (crypto_ahash_walk_last(&rctx->walk)) {
+ rctx->flag |= HASH_DONE;
+ flag = HASH_LAST;
+ }
+
+ /* submit */
+ rctx->flag |= HASH_FINAL;
+ sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
+ sha256_mb_add_list(rctx, cstate);
+
+ kernel_fpu_begin();
+ sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+ nbytes, flag);
+ kernel_fpu_end();
+
+ /* check if anything is returned */
+ if (!sha_ctx)
+ return -EINPROGRESS;
+
+ if (sha_ctx->error) {
+ ret = sha_ctx->error;
+ goto done;
+ }
+
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&rctx, cstate, false);
+ if (!rctx)
+ return -EINPROGRESS;
+done:
+ sha_complete_job(rctx, cstate, ret);
+ return ret;
+}
+
+static int sha256_mb_final(struct ahash_request *areq)
+{
+ struct mcryptd_hash_request_ctx *rctx =
+ container_of(areq, struct mcryptd_hash_request_ctx,
+ areq);
+ struct mcryptd_alg_cstate *cstate =
+ this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
+
+ struct sha256_hash_ctx *sha_ctx;
+ int ret = 0;
+ u8 data;
+
+ /* sanity check */
+ if (rctx->tag.cpu != smp_processor_id()) {
+ pr_err("mcryptd error: cpu clash\n");
+ goto done;
+ }
+
+ /* need to init context */
+ req_ctx_init(rctx, areq);
+
+ rctx->flag |= HASH_DONE | HASH_FINAL;
+
+ sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
+ /* flag HASH_FINAL and 0 data size */
+ sha256_mb_add_list(rctx, cstate);
+ kernel_fpu_begin();
+ sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
+ HASH_LAST);
+ kernel_fpu_end();
+
+ /* check if anything is returned */
+ if (!sha_ctx)
+ return -EINPROGRESS;
+
+ if (sha_ctx->error) {
+ ret = sha_ctx->error;
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ goto done;
+ }
+
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&rctx, cstate, false);
+ if (!rctx)
+ return -EINPROGRESS;
+done:
+ sha_complete_job(rctx, cstate, ret);
+ return ret;
+}
+
+static int sha256_mb_export(struct ahash_request *areq, void *out)
+{
+ struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha256_mb_import(struct ahash_request *areq, const void *in)
+{
+ struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm)
+{
+ struct mcryptd_ahash *mcryptd_tfm;
+ struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct mcryptd_hash_ctx *mctx;
+
+ mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb",
+ CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
+ if (IS_ERR(mcryptd_tfm))
+ return PTR_ERR(mcryptd_tfm);
+ mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
+ mctx->alg_state = &sha256_mb_alg_state;
+ ctx->mcryptd_tfm = mcryptd_tfm;
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct ahash_request) +
+ crypto_ahash_reqsize(&mcryptd_tfm->base));
+
+ return 0;
+}
+
+static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm)
+{
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct ahash_request) +
+ sizeof(struct sha256_hash_ctx));
+
+ return 0;
+}
+
+static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static struct ahash_alg sha256_mb_areq_alg = {
+ .init = sha256_mb_init,
+ .update = sha256_mb_update,
+ .final = sha256_mb_final,
+ .finup = sha256_mb_finup,
+ .export = sha256_mb_export,
+ .import = sha256_mb_import,
+ .halg = {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_hash_ctx),
+ .base = {
+ .cra_name = "__sha256-mb",
+ .cra_driver_name = "__intel_sha256-mb",
+ .cra_priority = 100,
+ /*
+ * use ASYNC flag as some buffers in multi-buffer
+ * algo may not have completed before hashing thread
+ * sleep
+ */
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT
+ (sha256_mb_areq_alg.halg.base.cra_list),
+ .cra_init = sha256_mb_areq_init_tfm,
+ .cra_exit = sha256_mb_areq_exit_tfm,
+ .cra_ctxsize = sizeof(struct sha256_hash_ctx),
+ }
+ }
+};
+
+static int sha256_mb_async_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_init(mcryptd_req);
+}
+
+static int sha256_mb_async_update(struct ahash_request *req)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_update(mcryptd_req);
+}
+
+static int sha256_mb_async_finup(struct ahash_request *req)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_finup(mcryptd_req);
+}
+
+static int sha256_mb_async_final(struct ahash_request *req)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_final(mcryptd_req);
+}
+
+static int sha256_mb_async_digest(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_digest(mcryptd_req);
+}
+
+static int sha256_mb_async_export(struct ahash_request *req, void *out)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_export(mcryptd_req, out);
+}
+
+static int sha256_mb_async_import(struct ahash_request *req, const void *in)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+ struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
+ struct mcryptd_hash_request_ctx *rctx;
+ struct ahash_request *areq;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ rctx = ahash_request_ctx(mcryptd_req);
+ areq = &rctx->areq;
+
+ ahash_request_set_tfm(areq, child);
+ ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
+ rctx->complete, req);
+
+ return crypto_ahash_import(mcryptd_req, in);
+}
+
+static struct ahash_alg sha256_mb_async_alg = {
+ .init = sha256_mb_async_init,
+ .update = sha256_mb_async_update,
+ .final = sha256_mb_async_final,
+ .finup = sha256_mb_async_finup,
+ .export = sha256_mb_async_export,
+ .import = sha256_mb_async_import,
+ .digest = sha256_mb_async_digest,
+ .halg = {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_hash_ctx),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256_mb",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+ CRYPTO_ALG_ASYNC,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_type = &crypto_ahash_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT
+ (sha256_mb_async_alg.halg.base.cra_list),
+ .cra_init = sha256_mb_async_init_tfm,
+ .cra_exit = sha256_mb_async_exit_tfm,
+ .cra_ctxsize = sizeof(struct sha256_mb_ctx),
+ .cra_alignmask = 0,
+ },
+ },
+};
+
+static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate)
+{
+ struct mcryptd_hash_request_ctx *rctx;
+ unsigned long cur_time;
+ unsigned long next_flush = 0;
+ struct sha256_hash_ctx *sha_ctx;
+
+
+ cur_time = jiffies;
+
+ while (!list_empty(&cstate->work_list)) {
+ rctx = list_entry(cstate->work_list.next,
+ struct mcryptd_hash_request_ctx, waiter);
+ if (time_before(cur_time, rctx->tag.expire))
+ break;
+ kernel_fpu_begin();
+ sha_ctx = (struct sha256_hash_ctx *)
+ sha256_ctx_mgr_flush(cstate->mgr);
+ kernel_fpu_end();
+ if (!sha_ctx) {
+ pr_err("sha256_mb error: nothing got"
+ " flushed for non-empty list\n");
+ break;
+ }
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ sha_finish_walk(&rctx, cstate, true);
+ sha_complete_job(rctx, cstate, 0);
+ }
+
+ if (!list_empty(&cstate->work_list)) {
+ rctx = list_entry(cstate->work_list.next,
+ struct mcryptd_hash_request_ctx, waiter);
+ /* get the hash context and then flush time */
+ next_flush = rctx->tag.expire;
+ mcryptd_arm_flusher(cstate, get_delay(next_flush));
+ }
+ return next_flush;
+}
+
+static int __init sha256_mb_mod_init(void)
+{
+
+ int cpu;
+ int err;
+ struct mcryptd_alg_cstate *cpu_state;
+
+ /* check for dependent cpu features */
+ if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_BMI2))
+ return -ENODEV;
+
+ /* initialize multibuffer structures */
+ sha256_mb_alg_state.alg_cstate = alloc_percpu
+ (struct mcryptd_alg_cstate);
+
+ sha256_job_mgr_init = sha256_mb_mgr_init_avx2;
+ sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2;
+ sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2;
+ sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2;
+
+ if (!sha256_mb_alg_state.alg_cstate)
+ return -ENOMEM;
+ for_each_possible_cpu(cpu) {
+ cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
+ cpu_state->next_flush = 0;
+ cpu_state->next_seq_num = 0;
+ cpu_state->flusher_engaged = false;
+ INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
+ cpu_state->cpu = cpu;
+ cpu_state->alg_state = &sha256_mb_alg_state;
+ cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr),
+ GFP_KERNEL);
+ if (!cpu_state->mgr)
+ goto err2;
+ sha256_ctx_mgr_init(cpu_state->mgr);
+ INIT_LIST_HEAD(&cpu_state->work_list);
+ spin_lock_init(&cpu_state->work_lock);
+ }
+ sha256_mb_alg_state.flusher = &sha256_mb_flusher;
+
+ err = crypto_register_ahash(&sha256_mb_areq_alg);
+ if (err)
+ goto err2;
+ err = crypto_register_ahash(&sha256_mb_async_alg);
+ if (err)
+ goto err1;
+
+
+ return 0;
+err1:
+ crypto_unregister_ahash(&sha256_mb_areq_alg);
+err2:
+ for_each_possible_cpu(cpu) {
+ cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
+ kfree(cpu_state->mgr);
+ }
+ free_percpu(sha256_mb_alg_state.alg_cstate);
+ return -ENODEV;
+}
+
+static void __exit sha256_mb_mod_fini(void)
+{
+ int cpu;
+ struct mcryptd_alg_cstate *cpu_state;
+
+ crypto_unregister_ahash(&sha256_mb_async_alg);
+ crypto_unregister_ahash(&sha256_mb_areq_alg);
+ for_each_possible_cpu(cpu) {
+ cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
+ kfree(cpu_state->mgr);
+ }
+ free_percpu(sha256_mb_alg_state.alg_cstate);
+}
+
+module_init(sha256_mb_mod_init);
+module_exit(sha256_mb_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated");
+
+MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
new file mode 100644
index 000000000000..edd252b73206
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
@@ -0,0 +1,136 @@
+/*
+ * Header file for multi buffer SHA256 context
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SHA_MB_CTX_INTERNAL_H
+#define _SHA_MB_CTX_INTERNAL_H
+
+#include "sha256_mb_mgr.h"
+
+#define HASH_UPDATE 0x00
+#define HASH_FIRST 0x01
+#define HASH_LAST 0x02
+#define HASH_ENTIRE 0x03
+#define HASH_DONE 0x04
+#define HASH_FINAL 0x08
+
+#define HASH_CTX_STS_IDLE 0x00
+#define HASH_CTX_STS_PROCESSING 0x01
+#define HASH_CTX_STS_LAST 0x02
+#define HASH_CTX_STS_COMPLETE 0x04
+
+enum hash_ctx_error {
+ HASH_CTX_ERROR_NONE = 0,
+ HASH_CTX_ERROR_INVALID_FLAGS = -1,
+ HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
+ HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
+
+#ifdef HASH_CTX_DEBUG
+ HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
+#endif
+};
+
+
+#define hash_ctx_user_data(ctx) ((ctx)->user_data)
+#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
+#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
+#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
+#define hash_ctx_status(ctx) ((ctx)->status)
+#define hash_ctx_error(ctx) ((ctx)->error)
+#define hash_ctx_init(ctx) \
+ do { \
+ (ctx)->error = HASH_CTX_ERROR_NONE; \
+ (ctx)->status = HASH_CTX_STS_COMPLETE; \
+ } while (0)
+
+
+/* Hash Constants and Typedefs */
+#define SHA256_DIGEST_LENGTH 8
+#define SHA256_LOG2_BLOCK_SIZE 6
+
+#define SHA256_PADLENGTHFIELD_SIZE 8
+
+#ifdef SHA_MB_DEBUG
+#define assert(expr) \
+do { \
+ if (unlikely(!(expr))) { \
+ printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
+ #expr, __FILE__, __func__, __LINE__); \
+ } \
+} while (0)
+#else
+#define assert(expr) do {} while (0)
+#endif
+
+struct sha256_ctx_mgr {
+ struct sha256_mb_mgr mgr;
+};
+
+/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
+
+struct sha256_hash_ctx {
+ /* Must be at struct offset 0 */
+ struct job_sha256 job;
+ /* status flag */
+ int status;
+ /* error flag */
+ int error;
+
+ uint32_t total_length;
+ const void *incoming_buffer;
+ uint32_t incoming_buffer_length;
+ uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2];
+ uint32_t partial_block_buffer_length;
+ void *user_data;
+};
+
+#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
new file mode 100644
index 000000000000..b01ae408c56d
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
@@ -0,0 +1,108 @@
+/*
+ * Header file for multi buffer SHA256 algorithm manager
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __SHA_MB_MGR_H
+#define __SHA_MB_MGR_H
+
+#include <linux/types.h>
+
+#define NUM_SHA256_DIGEST_WORDS 8
+
+enum job_sts { STS_UNKNOWN = 0,
+ STS_BEING_PROCESSED = 1,
+ STS_COMPLETED = 2,
+ STS_INTERNAL_ERROR = 3,
+ STS_ERROR = 4
+};
+
+struct job_sha256 {
+ u8 *buffer;
+ u32 len;
+ u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
+ enum job_sts status;
+ void *user_data;
+};
+
+/* SHA256 out-of-order scheduler */
+
+/* typedef uint32_t sha8_digest_array[8][8]; */
+
+struct sha256_args_x8 {
+ uint32_t digest[8][8];
+ uint8_t *data_ptr[8];
+};
+
+struct sha256_lane_data {
+ struct job_sha256 *job_in_lane;
+};
+
+struct sha256_mb_mgr {
+ struct sha256_args_x8 args;
+
+ uint32_t lens[8];
+
+ /* each byte is index (0...7) of unused lanes */
+ uint64_t unused_lanes;
+ /* byte 4 is set to FF as a flag */
+ struct sha256_lane_data ldata[8];
+};
+
+
+#define SHA256_MB_MGR_NUM_LANES_AVX2 8
+
+void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
+struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
+ struct job_sha256 *job);
+struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
+struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
+
+#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
new file mode 100644
index 000000000000..5c377bac21d0
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
@@ -0,0 +1,304 @@
+/*
+ * Header file for multi buffer SHA256 algorithm data structure
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+# Macros for defining data structures
+
+# Usage example
+
+#START_FIELDS # JOB_AES
+### name size align
+#FIELD _plaintext, 8, 8 # pointer to plaintext
+#FIELD _ciphertext, 8, 8 # pointer to ciphertext
+#FIELD _IV, 16, 8 # IV
+#FIELD _keys, 8, 8 # pointer to keys
+#FIELD _len, 4, 4 # length in bytes
+#FIELD _status, 4, 4 # status enumeration
+#FIELD _user_data, 8, 8 # pointer to user data
+#UNION _union, size1, align1, \
+# size2, align2, \
+# size3, align3, \
+# ...
+#END_FIELDS
+#%assign _JOB_AES_size _FIELD_OFFSET
+#%assign _JOB_AES_align _STRUCT_ALIGN
+
+#########################################################################
+
+# Alternate "struc-like" syntax:
+# STRUCT job_aes2
+# RES_Q .plaintext, 1
+# RES_Q .ciphertext, 1
+# RES_DQ .IV, 1
+# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
+# RES_U .union, size1, align1, \
+# size2, align2, \
+# ...
+# ENDSTRUCT
+# # Following only needed if nesting
+# %assign job_aes2_size _FIELD_OFFSET
+# %assign job_aes2_align _STRUCT_ALIGN
+#
+# RES_* macros take a name, a count and an optional alignment.
+# The count in in terms of the base size of the macro, and the
+# default alignment is the base size.
+# The macros are:
+# Macro Base size
+# RES_B 1
+# RES_W 2
+# RES_D 4
+# RES_Q 8
+# RES_DQ 16
+# RES_Y 32
+# RES_Z 64
+#
+# RES_U defines a union. It's arguments are a name and two or more
+# pairs of "size, alignment"
+#
+# The two assigns are only needed if this structure is being nested
+# within another. Even if the assigns are not done, one can still use
+# STRUCT_NAME_size as the size of the structure.
+#
+# Note that for nesting, you still need to assign to STRUCT_NAME_size.
+#
+# The differences between this and using "struc" directly are that each
+# type is implicitly aligned to its natural length (although this can be
+# over-ridden with an explicit third parameter), and that the structure
+# is padded at the end to its overall alignment.
+#
+
+#########################################################################
+
+#ifndef _DATASTRUCT_ASM_
+#define _DATASTRUCT_ASM_
+
+#define SZ8 8*SHA256_DIGEST_WORD_SIZE
+#define ROUNDS 64*SZ8
+#define PTR_SZ 8
+#define SHA256_DIGEST_WORD_SIZE 4
+#define MAX_SHA256_LANES 8
+#define SHA256_DIGEST_WORDS 8
+#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
+#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
+#define SHA256_BLK_SZ 64
+
+# START_FIELDS
+.macro START_FIELDS
+ _FIELD_OFFSET = 0
+ _STRUCT_ALIGN = 0
+.endm
+
+# FIELD name size align
+.macro FIELD name size align
+ _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
+ \name = _FIELD_OFFSET
+ _FIELD_OFFSET = _FIELD_OFFSET + (\size)
+.if (\align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = \align
+.endif
+.endm
+
+# END_FIELDS
+.macro END_FIELDS
+ _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
+.endm
+
+########################################################################
+
+.macro STRUCT p1
+START_FIELDS
+.struc \p1
+.endm
+
+.macro ENDSTRUCT
+ tmp = _FIELD_OFFSET
+ END_FIELDS
+ tmp = (_FIELD_OFFSET - %%tmp)
+.if (tmp > 0)
+ .lcomm tmp
+.endif
+.endstruc
+.endm
+
+## RES_int name size align
+.macro RES_int p1 p2 p3
+ name = \p1
+ size = \p2
+ align = .\p3
+
+ _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
+.align align
+.lcomm name size
+ _FIELD_OFFSET = _FIELD_OFFSET + (size)
+.if (align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = align
+.endif
+.endm
+
+# macro RES_B name, size [, align]
+.macro RES_B _name, _size, _align=1
+RES_int _name _size _align
+.endm
+
+# macro RES_W name, size [, align]
+.macro RES_W _name, _size, _align=2
+RES_int _name 2*(_size) _align
+.endm
+
+# macro RES_D name, size [, align]
+.macro RES_D _name, _size, _align=4
+RES_int _name 4*(_size) _align
+.endm
+
+# macro RES_Q name, size [, align]
+.macro RES_Q _name, _size, _align=8
+RES_int _name 8*(_size) _align
+.endm
+
+# macro RES_DQ name, size [, align]
+.macro RES_DQ _name, _size, _align=16
+RES_int _name 16*(_size) _align
+.endm
+
+# macro RES_Y name, size [, align]
+.macro RES_Y _name, _size, _align=32
+RES_int _name 32*(_size) _align
+.endm
+
+# macro RES_Z name, size [, align]
+.macro RES_Z _name, _size, _align=64
+RES_int _name 64*(_size) _align
+.endm
+
+#endif
+
+
+########################################################################
+#### Define SHA256 Out Of Order Data Structures
+########################################################################
+
+START_FIELDS # LANE_DATA
+### name size align
+FIELD _job_in_lane, 8, 8 # pointer to job object
+END_FIELDS
+
+ _LANE_DATA_size = _FIELD_OFFSET
+ _LANE_DATA_align = _STRUCT_ALIGN
+
+########################################################################
+
+START_FIELDS # SHA256_ARGS_X4
+### name size align
+FIELD _digest, 4*8*8, 4 # transposed digest
+FIELD _data_ptr, 8*8, 8 # array of pointers to data
+END_FIELDS
+
+ _SHA256_ARGS_X4_size = _FIELD_OFFSET
+ _SHA256_ARGS_X4_align = _STRUCT_ALIGN
+ _SHA256_ARGS_X8_size = _FIELD_OFFSET
+ _SHA256_ARGS_X8_align = _STRUCT_ALIGN
+
+#######################################################################
+
+START_FIELDS # MB_MGR
+### name size align
+FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
+FIELD _lens, 4*8, 8
+FIELD _unused_lanes, 8, 8
+FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
+END_FIELDS
+
+ _MB_MGR_size = _FIELD_OFFSET
+ _MB_MGR_align = _STRUCT_ALIGN
+
+_args_digest = _args + _digest
+_args_data_ptr = _args + _data_ptr
+
+#######################################################################
+
+START_FIELDS #STACK_FRAME
+### name size align
+FIELD _data, 16*SZ8, 1 # transposed digest
+FIELD _digest, 8*SZ8, 1 # array of pointers to data
+FIELD _ytmp, 4*SZ8, 1
+FIELD _rsp, 8, 1
+END_FIELDS
+
+ _STACK_FRAME_size = _FIELD_OFFSET
+ _STACK_FRAME_align = _STRUCT_ALIGN
+
+#######################################################################
+
+########################################################################
+#### Define constants
+########################################################################
+
+#define STS_UNKNOWN 0
+#define STS_BEING_PROCESSED 1
+#define STS_COMPLETED 2
+
+########################################################################
+#### Define JOB_SHA256 structure
+########################################################################
+
+START_FIELDS # JOB_SHA256
+
+### name size align
+FIELD _buffer, 8, 8 # pointer to buffer
+FIELD _len, 8, 8 # length in bytes
+FIELD _result_digest, 8*4, 32 # Digest (output)
+FIELD _status, 4, 4
+FIELD _user_data, 8, 8
+END_FIELDS
+
+ _JOB_SHA256_size = _FIELD_OFFSET
+ _JOB_SHA256_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
new file mode 100644
index 000000000000..b691da981cd9
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
@@ -0,0 +1,304 @@
+/*
+ * Flush routine for SHA256 multibuffer
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include "sha256_mb_mgr_datastruct.S"
+
+.extern sha256_x8_avx2
+
+#LINUX register definitions
+#define arg1 %rdi
+#define arg2 %rsi
+
+# Common register definitions
+#define state arg1
+#define job arg2
+#define len2 arg2
+
+# idx must be a register not clobberred by sha1_mult
+#define idx %r8
+#define DWORD_idx %r8d
+
+#define unused_lanes %rbx
+#define lane_data %rbx
+#define tmp2 %rbx
+#define tmp2_w %ebx
+
+#define job_rax %rax
+#define tmp1 %rax
+#define size_offset %rax
+#define tmp %rax
+#define start_offset %rax
+
+#define tmp3 %arg1
+
+#define extra_blocks %arg2
+#define p %arg2
+
+.macro LABEL prefix n
+\prefix\n\():
+.endm
+
+.macro JNE_SKIP i
+jne skip_\i
+.endm
+
+.altmacro
+.macro SET_OFFSET _offset
+offset = \_offset
+.endm
+.noaltmacro
+
+# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
+# arg 1 : rcx : state
+ENTRY(sha256_mb_mgr_flush_avx2)
+ FRAME_BEGIN
+ push %rbx
+
+ # If bit (32+3) is set, then all lanes are empty
+ mov _unused_lanes(state), unused_lanes
+ bt $32+3, unused_lanes
+ jc return_null
+
+ # find a lane with a non-null job
+ xor idx, idx
+ offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne one(%rip), idx
+ offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne two(%rip), idx
+ offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne three(%rip), idx
+ offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne four(%rip), idx
+ offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne five(%rip), idx
+ offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne six(%rip), idx
+ offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne seven(%rip), idx
+
+ # copy idx to empty lanes
+copy_lane_data:
+ offset = (_args + _data_ptr)
+ mov offset(state,idx,8), tmp
+
+ I = 0
+.rep 8
+ offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+.altmacro
+ JNE_SKIP %I
+ offset = (_args + _data_ptr + 8*I)
+ mov tmp, offset(state)
+ offset = (_lens + 4*I)
+ movl $0xFFFFFFFF, offset(state)
+LABEL skip_ %I
+ I = (I+1)
+.noaltmacro
+.endr
+
+ # Find min length
+ vmovdqa _lens+0*16(state), %xmm0
+ vmovdqa _lens+1*16(state), %xmm1
+
+ vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
+ vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
+ vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
+
+ vmovd %xmm2, DWORD_idx
+ mov idx, len2
+ and $0xF, idx
+ shr $4, len2
+ jz len_is_0
+
+ vpand clear_low_nibble(%rip), %xmm2, %xmm2
+ vpshufd $0, %xmm2, %xmm2
+
+ vpsubd %xmm2, %xmm0, %xmm0
+ vpsubd %xmm2, %xmm1, %xmm1
+
+ vmovdqa %xmm0, _lens+0*16(state)
+ vmovdqa %xmm1, _lens+1*16(state)
+
+ # "state" and "args" are the same address, arg1
+ # len is arg2
+ call sha256_x8_avx2
+ # state and idx are intact
+
+len_is_0:
+ # process completed job "idx"
+ imul $_LANE_DATA_size, idx, lane_data
+ lea _ldata(state, lane_data), lane_data
+
+ mov _job_in_lane(lane_data), job_rax
+ movq $0, _job_in_lane(lane_data)
+ movl $STS_COMPLETED, _status(job_rax)
+ mov _unused_lanes(state), unused_lanes
+ shl $4, unused_lanes
+ or idx, unused_lanes
+
+ mov unused_lanes, _unused_lanes(state)
+ movl $0xFFFFFFFF, _lens(state,idx,4)
+
+ vmovd _args_digest(state , idx, 4) , %xmm0
+ vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
+ vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
+ vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
+ vmovd _args_digest+4*32(state, idx, 4), %xmm1
+ vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
+ vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
+ vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
+
+ vmovdqu %xmm0, _result_digest(job_rax)
+ offset = (_result_digest + 1*16)
+ vmovdqu %xmm1, offset(job_rax)
+
+return:
+ pop %rbx
+ FRAME_END
+ ret
+
+return_null:
+ xor job_rax, job_rax
+ jmp return
+ENDPROC(sha256_mb_mgr_flush_avx2)
+
+##############################################################################
+
+.align 16
+ENTRY(sha256_mb_mgr_get_comp_job_avx2)
+ push %rbx
+
+ ## if bit 32+3 is set, then all lanes are empty
+ mov _unused_lanes(state), unused_lanes
+ bt $(32+3), unused_lanes
+ jc .return_null
+
+ # Find min length
+ vmovdqa _lens(state), %xmm0
+ vmovdqa _lens+1*16(state), %xmm1
+
+ vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
+ vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
+ vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
+
+ vmovd %xmm2, DWORD_idx
+ test $~0xF, idx
+ jnz .return_null
+
+ # process completed job "idx"
+ imul $_LANE_DATA_size, idx, lane_data
+ lea _ldata(state, lane_data), lane_data
+
+ mov _job_in_lane(lane_data), job_rax
+ movq $0, _job_in_lane(lane_data)
+ movl $STS_COMPLETED, _status(job_rax)
+ mov _unused_lanes(state), unused_lanes
+ shl $4, unused_lanes
+ or idx, unused_lanes
+ mov unused_lanes, _unused_lanes(state)
+
+ movl $0xFFFFFFFF, _lens(state, idx, 4)
+
+ vmovd _args_digest(state, idx, 4), %xmm0
+ vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
+ vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
+ vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
+ movl _args_digest+4*32(state, idx, 4), tmp2_w
+ vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
+ vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
+ vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
+
+ vmovdqu %xmm0, _result_digest(job_rax)
+ movl tmp2_w, _result_digest+1*16(job_rax)
+
+ pop %rbx
+
+ ret
+
+.return_null:
+ xor job_rax, job_rax
+ pop %rbx
+ ret
+ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
+
+.data
+
+.align 16
+clear_low_nibble:
+.octa 0x000000000000000000000000FFFFFFF0
+one:
+.quad 1
+two:
+.quad 2
+three:
+.quad 3
+four:
+.quad 4
+five:
+.quad 5
+six:
+.quad 6
+seven:
+.quad 7
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
new file mode 100644
index 000000000000..b0c498371e67
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
@@ -0,0 +1,65 @@
+/*
+ * Initialization code for multi buffer SHA256 algorithm for AVX2
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "sha256_mb_mgr.h"
+
+void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
+{
+ unsigned int j;
+
+ state->unused_lanes = 0xF76543210ULL;
+ for (j = 0; j < 8; j++) {
+ state->lens[j] = 0xFFFFFFFF;
+ state->ldata[j].job_in_lane = NULL;
+ }
+}
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
new file mode 100644
index 000000000000..7ea670e25acc
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
@@ -0,0 +1,215 @@
+/*
+ * Buffer submit code for multi buffer SHA256 algorithm
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include "sha256_mb_mgr_datastruct.S"
+
+.extern sha256_x8_avx2
+
+# LINUX register definitions
+arg1 = %rdi
+arg2 = %rsi
+size_offset = %rcx
+tmp2 = %rcx
+extra_blocks = %rdx
+
+# Common definitions
+#define state arg1
+#define job %rsi
+#define len2 arg2
+#define p2 arg2
+
+# idx must be a register not clobberred by sha1_x8_avx2
+idx = %r8
+DWORD_idx = %r8d
+last_len = %r8
+
+p = %r11
+start_offset = %r11
+
+unused_lanes = %rbx
+BYTE_unused_lanes = %bl
+
+job_rax = %rax
+len = %rax
+DWORD_len = %eax
+
+lane = %r12
+tmp3 = %r12
+
+tmp = %r9
+DWORD_tmp = %r9d
+
+lane_data = %r10
+
+# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
+# arg 1 : rcx : state
+# arg 2 : rdx : job
+ENTRY(sha256_mb_mgr_submit_avx2)
+ FRAME_BEGIN
+ push %rbx
+ push %r12
+
+ mov _unused_lanes(state), unused_lanes
+ mov unused_lanes, lane
+ and $0xF, lane
+ shr $4, unused_lanes
+ imul $_LANE_DATA_size, lane, lane_data
+ movl $STS_BEING_PROCESSED, _status(job)
+ lea _ldata(state, lane_data), lane_data
+ mov unused_lanes, _unused_lanes(state)
+ movl _len(job), DWORD_len
+
+ mov job, _job_in_lane(lane_data)
+ shl $4, len
+ or lane, len
+
+ movl DWORD_len, _lens(state , lane, 4)
+
+ # Load digest words from result_digest
+ vmovdqu _result_digest(job), %xmm0
+ vmovdqu _result_digest+1*16(job), %xmm1
+ vmovd %xmm0, _args_digest(state, lane, 4)
+ vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
+ vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
+ vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
+ vmovd %xmm1, _args_digest+4*32(state , lane, 4)
+
+ vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4)
+ vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4)
+ vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4)
+
+ mov _buffer(job), p
+ mov p, _args_data_ptr(state, lane, 8)
+
+ cmp $0xF, unused_lanes
+ jne return_null
+
+start_loop:
+ # Find min length
+ vmovdqa _lens(state), %xmm0
+ vmovdqa _lens+1*16(state), %xmm1
+
+ vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
+ vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
+ vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
+
+ vmovd %xmm2, DWORD_idx
+ mov idx, len2
+ and $0xF, idx
+ shr $4, len2
+ jz len_is_0
+
+ vpand clear_low_nibble(%rip), %xmm2, %xmm2
+ vpshufd $0, %xmm2, %xmm2
+
+ vpsubd %xmm2, %xmm0, %xmm0
+ vpsubd %xmm2, %xmm1, %xmm1
+
+ vmovdqa %xmm0, _lens + 0*16(state)
+ vmovdqa %xmm1, _lens + 1*16(state)
+
+ # "state" and "args" are the same address, arg1
+ # len is arg2
+ call sha256_x8_avx2
+
+ # state and idx are intact
+
+len_is_0:
+ # process completed job "idx"
+ imul $_LANE_DATA_size, idx, lane_data
+ lea _ldata(state, lane_data), lane_data
+
+ mov _job_in_lane(lane_data), job_rax
+ mov _unused_lanes(state), unused_lanes
+ movq $0, _job_in_lane(lane_data)
+ movl $STS_COMPLETED, _status(job_rax)
+ shl $4, unused_lanes
+ or idx, unused_lanes
+ mov unused_lanes, _unused_lanes(state)
+
+ movl $0xFFFFFFFF, _lens(state,idx,4)
+
+ vmovd _args_digest(state, idx, 4), %xmm0
+ vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
+ vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
+ vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
+ vmovd _args_digest+4*32(state, idx, 4), %xmm1
+
+ vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
+ vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
+ vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
+
+ vmovdqu %xmm0, _result_digest(job_rax)
+ vmovdqu %xmm1, _result_digest+1*16(job_rax)
+
+return:
+ pop %r12
+ pop %rbx
+ FRAME_END
+ ret
+
+return_null:
+ xor job_rax, job_rax
+ jmp return
+
+ENDPROC(sha256_mb_mgr_submit_avx2)
+
+.data
+
+.align 16
+clear_low_nibble:
+ .octa 0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
new file mode 100644
index 000000000000..aa21aea4c722
--- /dev/null
+++ b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
@@ -0,0 +1,593 @@
+/*
+ * Multi-buffer SHA256 algorithm hash compute routine
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include "sha256_mb_mgr_datastruct.S"
+
+## code to compute oct SHA256 using SSE-256
+## outer calling routine takes care of save and restore of XMM registers
+## Logic designed/laid out by JDG
+
+## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15
+## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
+## Linux preserves: rdi rbp r8
+##
+## clobbers %ymm0-15
+
+arg1 = %rdi
+arg2 = %rsi
+reg3 = %rcx
+reg4 = %rdx
+
+# Common definitions
+STATE = arg1
+INP_SIZE = arg2
+
+IDX = %rax
+ROUND = %rbx
+TBL = reg3
+
+inp0 = %r9
+inp1 = %r10
+inp2 = %r11
+inp3 = %r12
+inp4 = %r13
+inp5 = %r14
+inp6 = %r15
+inp7 = reg4
+
+a = %ymm0
+b = %ymm1
+c = %ymm2
+d = %ymm3
+e = %ymm4
+f = %ymm5
+g = %ymm6
+h = %ymm7
+
+T1 = %ymm8
+
+a0 = %ymm12
+a1 = %ymm13
+a2 = %ymm14
+TMP = %ymm15
+TMP0 = %ymm6
+TMP1 = %ymm7
+
+TT0 = %ymm8
+TT1 = %ymm9
+TT2 = %ymm10
+TT3 = %ymm11
+TT4 = %ymm12
+TT5 = %ymm13
+TT6 = %ymm14
+TT7 = %ymm15
+
+# Define stack usage
+
+# Assume stack aligned to 32 bytes before call
+# Therefore FRAMESZ mod 32 must be 32-8 = 24
+
+#define FRAMESZ 0x388
+
+#define VMOVPS vmovups
+
+# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
+# "transpose" data in {r0...r7} using temps {t0...t1}
+# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
+# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
+# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
+# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
+# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
+# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
+# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
+# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
+# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
+#
+# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
+# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
+# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
+# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
+# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
+# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
+# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
+# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
+# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
+#
+
+.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
+ # process top half (r0..r3) {a...d}
+ vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
+ vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
+ vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
+ vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
+ vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
+ vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
+ vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
+ vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
+
+ # use r2 in place of t0
+ # process bottom half (r4..r7) {e...h}
+ vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
+ vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
+ vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
+ vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
+ vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
+ vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
+ vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
+ vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
+
+ vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
+ vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
+ vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
+ vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
+ vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
+ vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
+ vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
+ vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
+
+.endm
+
+.macro ROTATE_ARGS
+TMP_ = h
+h = g
+g = f
+f = e
+e = d
+d = c
+c = b
+b = a
+a = TMP_
+.endm
+
+.macro _PRORD reg imm tmp
+ vpslld $(32-\imm),\reg,\tmp
+ vpsrld $\imm,\reg, \reg
+ vpor \tmp,\reg, \reg
+.endm
+
+# PRORD_nd reg, imm, tmp, src
+.macro _PRORD_nd reg imm tmp src
+ vpslld $(32-\imm), \src, \tmp
+ vpsrld $\imm, \src, \reg
+ vpor \tmp, \reg, \reg
+.endm
+
+# PRORD dst/src, amt
+.macro PRORD reg imm
+ _PRORD \reg,\imm,TMP
+.endm
+
+# PRORD_nd dst, src, amt
+.macro PRORD_nd reg tmp imm
+ _PRORD_nd \reg, \imm, TMP, \tmp
+.endm
+
+# arguments passed implicitly in preprocessor symbols i, a...h
+.macro ROUND_00_15 _T1 i
+ PRORD_nd a0,e,5 # sig1: a0 = (e >> 5)
+
+ vpxor g, f, a2 # ch: a2 = f^g
+ vpand e,a2, a2 # ch: a2 = (f^g)&e
+ vpxor g, a2, a2 # a2 = ch
+
+ PRORD_nd a1,e,25 # sig1: a1 = (e >> 25)
+
+ vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp)
+ vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
+ vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
+ PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11)
+ vpaddd a2, h, h # h = h + ch
+ PRORD_nd a2,a,11 # sig0: a2 = (a >> 11)
+ vpaddd \_T1,h, h # h = h + ch + W + K
+ vpxor a1, a0, a0 # a0 = sigma1
+ PRORD_nd a1,a,22 # sig0: a1 = (a >> 22)
+ vpxor c, a, \_T1 # maj: T1 = a^c
+ add $SZ8, ROUND # ROUND++
+ vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
+ vpaddd a0, h, h
+ vpaddd h, d, d
+ vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
+ PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13)
+ vpxor a1, a2, a2 # a2 = sig0
+ vpand c, a, a1 # maj: a1 = a&c
+ vpor \_T1, a1, a1 # a1 = maj
+ vpaddd a1, h, h # h = h + ch + W + K + maj
+ vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0
+ ROTATE_ARGS
+.endm
+
+# arguments passed implicitly in preprocessor symbols i, a...h
+.macro ROUND_16_XX _T1 i
+ vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1
+ vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1
+ vmovdqu \_T1, a0
+ PRORD \_T1,11
+ vmovdqu a1, a2
+ PRORD a1,2
+ vpxor a0, \_T1, \_T1
+ PRORD \_T1, 7
+ vpxor a2, a1, a1
+ PRORD a1, 17
+ vpsrld $3, a0, a0
+ vpxor a0, \_T1, \_T1
+ vpsrld $10, a2, a2
+ vpxor a2, a1, a1
+ vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
+ vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1
+ vpaddd a1, \_T1, \_T1
+
+ ROUND_00_15 \_T1,\i
+.endm
+
+# SHA256_ARGS:
+# UINT128 digest[8]; // transposed digests
+# UINT8 *data_ptr[4];
+
+# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
+# arg 1 : STATE : pointer to array of pointers to input data
+# arg 2 : INP_SIZE : size of input in blocks
+ # general registers preserved in outer calling routine
+ # outer calling routine saves all the XMM registers
+ # save rsp, allocate 32-byte aligned for local variables
+ENTRY(sha256_x8_avx2)
+
+ # save callee-saved clobbered registers to comply with C function ABI
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov %rsp, IDX
+ sub $FRAMESZ, %rsp
+ and $~0x1F, %rsp
+ mov IDX, _rsp(%rsp)
+
+ # Load the pre-transposed incoming digest.
+ vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a
+ vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b
+ vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c
+ vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d
+ vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e
+ vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f
+ vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g
+ vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h
+
+ lea K256_8(%rip),TBL
+
+ # load the address of each of the 4 message lanes
+ # getting ready to transpose input onto stack
+ mov _args_data_ptr+0*PTR_SZ(STATE),inp0
+ mov _args_data_ptr+1*PTR_SZ(STATE),inp1
+ mov _args_data_ptr+2*PTR_SZ(STATE),inp2
+ mov _args_data_ptr+3*PTR_SZ(STATE),inp3
+ mov _args_data_ptr+4*PTR_SZ(STATE),inp4
+ mov _args_data_ptr+5*PTR_SZ(STATE),inp5
+ mov _args_data_ptr+6*PTR_SZ(STATE),inp6
+ mov _args_data_ptr+7*PTR_SZ(STATE),inp7
+
+ xor IDX, IDX
+lloop:
+ xor ROUND, ROUND
+
+ # save old digest
+ vmovdqu a, _digest(%rsp)
+ vmovdqu b, _digest+1*SZ8(%rsp)
+ vmovdqu c, _digest+2*SZ8(%rsp)
+ vmovdqu d, _digest+3*SZ8(%rsp)
+ vmovdqu e, _digest+4*SZ8(%rsp)
+ vmovdqu f, _digest+5*SZ8(%rsp)
+ vmovdqu g, _digest+6*SZ8(%rsp)
+ vmovdqu h, _digest+7*SZ8(%rsp)
+ i = 0
+.rep 2
+ VMOVPS i*32(inp0, IDX), TT0
+ VMOVPS i*32(inp1, IDX), TT1
+ VMOVPS i*32(inp2, IDX), TT2
+ VMOVPS i*32(inp3, IDX), TT3
+ VMOVPS i*32(inp4, IDX), TT4
+ VMOVPS i*32(inp5, IDX), TT5
+ VMOVPS i*32(inp6, IDX), TT6
+ VMOVPS i*32(inp7, IDX), TT7
+ vmovdqu g, _ytmp(%rsp)
+ vmovdqu h, _ytmp+1*SZ8(%rsp)
+ TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1
+ vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
+ vmovdqu _ytmp(%rsp), g
+ vpshufb TMP1, TT0, TT0
+ vpshufb TMP1, TT1, TT1
+ vpshufb TMP1, TT2, TT2
+ vpshufb TMP1, TT3, TT3
+ vpshufb TMP1, TT4, TT4
+ vpshufb TMP1, TT5, TT5
+ vpshufb TMP1, TT6, TT6
+ vpshufb TMP1, TT7, TT7
+ vmovdqu _ytmp+1*SZ8(%rsp), h
+ vmovdqu TT4, _ytmp(%rsp)
+ vmovdqu TT5, _ytmp+1*SZ8(%rsp)
+ vmovdqu TT6, _ytmp+2*SZ8(%rsp)
+ vmovdqu TT7, _ytmp+3*SZ8(%rsp)
+ ROUND_00_15 TT0,(i*8+0)
+ vmovdqu _ytmp(%rsp), TT0
+ ROUND_00_15 TT1,(i*8+1)
+ vmovdqu _ytmp+1*SZ8(%rsp), TT1
+ ROUND_00_15 TT2,(i*8+2)
+ vmovdqu _ytmp+2*SZ8(%rsp), TT2
+ ROUND_00_15 TT3,(i*8+3)
+ vmovdqu _ytmp+3*SZ8(%rsp), TT3
+ ROUND_00_15 TT0,(i*8+4)
+ ROUND_00_15 TT1,(i*8+5)
+ ROUND_00_15 TT2,(i*8+6)
+ ROUND_00_15 TT3,(i*8+7)
+ i = (i+1)
+.endr
+ add $64, IDX
+ i = (i*8)
+
+ jmp Lrounds_16_xx
+.align 16
+Lrounds_16_xx:
+.rep 16
+ ROUND_16_XX T1, i
+ i = (i+1)
+.endr
+
+ cmp $ROUNDS,ROUND
+ jb Lrounds_16_xx
+
+ # add old digest
+ vpaddd _digest+0*SZ8(%rsp), a, a
+ vpaddd _digest+1*SZ8(%rsp), b, b
+ vpaddd _digest+2*SZ8(%rsp), c, c
+ vpaddd _digest+3*SZ8(%rsp), d, d
+ vpaddd _digest+4*SZ8(%rsp), e, e
+ vpaddd _digest+5*SZ8(%rsp), f, f
+ vpaddd _digest+6*SZ8(%rsp), g, g
+ vpaddd _digest+7*SZ8(%rsp), h, h
+
+ sub $1, INP_SIZE # unit is blocks
+ jne lloop
+
+ # write back to memory (state object) the transposed digest
+ vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
+ vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
+ vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
+ vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
+ vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
+ vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
+ vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
+ vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
+
+ # update input pointers
+ add IDX, inp0
+ mov inp0, _args_data_ptr+0*8(STATE)
+ add IDX, inp1
+ mov inp1, _args_data_ptr+1*8(STATE)
+ add IDX, inp2
+ mov inp2, _args_data_ptr+2*8(STATE)
+ add IDX, inp3
+ mov inp3, _args_data_ptr+3*8(STATE)
+ add IDX, inp4
+ mov inp4, _args_data_ptr+4*8(STATE)
+ add IDX, inp5
+ mov inp5, _args_data_ptr+5*8(STATE)
+ add IDX, inp6
+ mov inp6, _args_data_ptr+6*8(STATE)
+ add IDX, inp7
+ mov inp7, _args_data_ptr+7*8(STATE)
+
+ # Postamble
+ mov _rsp(%rsp), %rsp
+
+ # restore callee-saved clobbered registers
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+
+ ret
+ENDPROC(sha256_x8_avx2)
+.data
+.align 64
+K256_8:
+ .octa 0x428a2f98428a2f98428a2f98428a2f98
+ .octa 0x428a2f98428a2f98428a2f98428a2f98
+ .octa 0x71374491713744917137449171374491
+ .octa 0x71374491713744917137449171374491
+ .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
+ .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
+ .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
+ .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
+ .octa 0x3956c25b3956c25b3956c25b3956c25b
+ .octa 0x3956c25b3956c25b3956c25b3956c25b
+ .octa 0x59f111f159f111f159f111f159f111f1
+ .octa 0x59f111f159f111f159f111f159f111f1
+ .octa 0x923f82a4923f82a4923f82a4923f82a4
+ .octa 0x923f82a4923f82a4923f82a4923f82a4
+ .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
+ .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
+ .octa 0xd807aa98d807aa98d807aa98d807aa98
+ .octa 0xd807aa98d807aa98d807aa98d807aa98
+ .octa 0x12835b0112835b0112835b0112835b01
+ .octa 0x12835b0112835b0112835b0112835b01
+ .octa 0x243185be243185be243185be243185be
+ .octa 0x243185be243185be243185be243185be
+ .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
+ .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
+ .octa 0x72be5d7472be5d7472be5d7472be5d74
+ .octa 0x72be5d7472be5d7472be5d7472be5d74
+ .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
+ .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
+ .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
+ .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
+ .octa 0xc19bf174c19bf174c19bf174c19bf174
+ .octa 0xc19bf174c19bf174c19bf174c19bf174
+ .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
+ .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
+ .octa 0xefbe4786efbe4786efbe4786efbe4786
+ .octa 0xefbe4786efbe4786efbe4786efbe4786
+ .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
+ .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
+ .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
+ .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
+ .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
+ .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
+ .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
+ .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
+ .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
+ .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
+ .octa 0x76f988da76f988da76f988da76f988da
+ .octa 0x76f988da76f988da76f988da76f988da
+ .octa 0x983e5152983e5152983e5152983e5152
+ .octa 0x983e5152983e5152983e5152983e5152
+ .octa 0xa831c66da831c66da831c66da831c66d
+ .octa 0xa831c66da831c66da831c66da831c66d
+ .octa 0xb00327c8b00327c8b00327c8b00327c8
+ .octa 0xb00327c8b00327c8b00327c8b00327c8
+ .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
+ .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
+ .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
+ .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
+ .octa 0xd5a79147d5a79147d5a79147d5a79147
+ .octa 0xd5a79147d5a79147d5a79147d5a79147
+ .octa 0x06ca635106ca635106ca635106ca6351
+ .octa 0x06ca635106ca635106ca635106ca6351
+ .octa 0x14292967142929671429296714292967
+ .octa 0x14292967142929671429296714292967
+ .octa 0x27b70a8527b70a8527b70a8527b70a85
+ .octa 0x27b70a8527b70a8527b70a8527b70a85
+ .octa 0x2e1b21382e1b21382e1b21382e1b2138
+ .octa 0x2e1b21382e1b21382e1b21382e1b2138
+ .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
+ .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
+ .octa 0x53380d1353380d1353380d1353380d13
+ .octa 0x53380d1353380d1353380d1353380d13
+ .octa 0x650a7354650a7354650a7354650a7354
+ .octa 0x650a7354650a7354650a7354650a7354
+ .octa 0x766a0abb766a0abb766a0abb766a0abb
+ .octa 0x766a0abb766a0abb766a0abb766a0abb
+ .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
+ .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
+ .octa 0x92722c8592722c8592722c8592722c85
+ .octa 0x92722c8592722c8592722c8592722c85
+ .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
+ .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
+ .octa 0xa81a664ba81a664ba81a664ba81a664b
+ .octa 0xa81a664ba81a664ba81a664ba81a664b
+ .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
+ .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
+ .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
+ .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
+ .octa 0xd192e819d192e819d192e819d192e819
+ .octa 0xd192e819d192e819d192e819d192e819
+ .octa 0xd6990624d6990624d6990624d6990624
+ .octa 0xd6990624d6990624d6990624d6990624
+ .octa 0xf40e3585f40e3585f40e3585f40e3585
+ .octa 0xf40e3585f40e3585f40e3585f40e3585
+ .octa 0x106aa070106aa070106aa070106aa070
+ .octa 0x106aa070106aa070106aa070106aa070
+ .octa 0x19a4c11619a4c11619a4c11619a4c116
+ .octa 0x19a4c11619a4c11619a4c11619a4c116
+ .octa 0x1e376c081e376c081e376c081e376c08
+ .octa 0x1e376c081e376c081e376c081e376c08
+ .octa 0x2748774c2748774c2748774c2748774c
+ .octa 0x2748774c2748774c2748774c2748774c
+ .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
+ .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
+ .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
+ .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
+ .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
+ .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
+ .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
+ .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
+ .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
+ .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
+ .octa 0x748f82ee748f82ee748f82ee748f82ee
+ .octa 0x748f82ee748f82ee748f82ee748f82ee
+ .octa 0x78a5636f78a5636f78a5636f78a5636f
+ .octa 0x78a5636f78a5636f78a5636f78a5636f
+ .octa 0x84c8781484c8781484c8781484c87814
+ .octa 0x84c8781484c8781484c8781484c87814
+ .octa 0x8cc702088cc702088cc702088cc70208
+ .octa 0x8cc702088cc702088cc702088cc70208
+ .octa 0x90befffa90befffa90befffa90befffa
+ .octa 0x90befffa90befffa90befffa90befffa
+ .octa 0xa4506ceba4506ceba4506ceba4506ceb
+ .octa 0xa4506ceba4506ceba4506ceba4506ceb
+ .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
+ .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
+ .octa 0xc67178f2c67178f2c67178f2c67178f2
+ .octa 0xc67178f2c67178f2c67178f2c67178f2
+PSHUFFLE_BYTE_FLIP_MASK:
+.octa 0x0c0d0e0f08090a0b0405060700010203
+.octa 0x0c0d0e0f08090a0b0405060700010203
+
+.align 64
+.global K256
+K256:
+ .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 3ae0f43ebd37..9e79baf03a4b 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -427,4 +427,14 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha256-ssse3");
+MODULE_ALIAS_CRYPTO("sha256-avx");
+MODULE_ALIAS_CRYPTO("sha256-avx2");
MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha224-ssse3");
+MODULE_ALIAS_CRYPTO("sha224-avx");
+MODULE_ALIAS_CRYPTO("sha224-avx2");
+#ifdef CONFIG_AS_SHA256_NI
+MODULE_ALIAS_CRYPTO("sha256-ni");
+MODULE_ALIAS_CRYPTO("sha224-ni");
+#endif
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile
new file mode 100644
index 000000000000..0a57e2103980
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/Makefile
@@ -0,0 +1,11 @@
+#
+# Arch-specific CryptoAPI modules.
+#
+
+avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
+ $(comma)4)$(comma)%ymm2,yes,no)
+ifeq ($(avx2_supported),yes)
+ obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
+ sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
+ sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
+endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
new file mode 100644
index 000000000000..f4cf5b78fd36
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -0,0 +1,1046 @@
+/*
+ * Multi buffer SHA512 algorithm Glue Code
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+#include <crypto/mcryptd.h>
+#include <crypto/crypto_wq.h>
+#include <asm/byteorder.h>
+#include <linux/hardirq.h>
+#include <asm/fpu/api.h>
+#include "sha512_mb_ctx.h"
+
+#define FLUSH_INTERVAL 1000 /* in usec */
+
+static struct mcryptd_alg_state sha512_mb_alg_state;
+
+struct sha512_mb_ctx {
+ struct mcryptd_ahash *mcryptd_tfm;
+};
+
+static inline struct mcryptd_hash_request_ctx
+ *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
+{
+ struct ahash_request *areq;
+
+ areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
+ return container_of(areq, struct mcryptd_hash_request_ctx, areq);
+}
+
+static inline struct ahash_request
+ *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
+{
+ return container_of((void *) ctx, struct ahash_request, __ctx);
+}
+
+static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
+ struct ahash_request *areq)
+{
+ rctx->flag = HASH_UPDATE;
+}
+
+static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
+static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
+ (struct sha512_mb_mgr *state,
+ struct job_sha512 *job);
+static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
+ (struct sha512_mb_mgr *state);
+static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
+ (struct sha512_mb_mgr *state);
+
+inline void sha512_init_digest(uint64_t *digest)
+{
+ static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = {
+ SHA512_H0, SHA512_H1, SHA512_H2,
+ SHA512_H3, SHA512_H4, SHA512_H5,
+ SHA512_H6, SHA512_H7 };
+ memcpy(digest, initial_digest, sizeof(initial_digest));
+}
+
+inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
+ uint32_t total_len)
+{
+ uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
+
+ memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
+ padblock[i] = 0x80;
+
+ i += ((SHA512_BLOCK_SIZE - 1) &
+ (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
+ + 1 + SHA512_PADLENGTHFIELD_SIZE;
+
+#if SHA512_PADLENGTHFIELD_SIZE == 16
+ *((uint64_t *) &padblock[i - 16]) = 0;
+#endif
+
+ *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
+
+ /* Number of extra blocks to hash */
+ return i >> SHA512_LOG2_BLOCK_SIZE;
+}
+
+static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
+ (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
+{
+ while (ctx) {
+ if (ctx->status & HASH_CTX_STS_COMPLETE) {
+ /* Clear PROCESSING bit */
+ ctx->status = HASH_CTX_STS_COMPLETE;
+ return ctx;
+ }
+
+ /*
+ * If the extra blocks are empty, begin hashing what remains
+ * in the user's buffer.
+ */
+ if (ctx->partial_block_buffer_length == 0 &&
+ ctx->incoming_buffer_length) {
+
+ const void *buffer = ctx->incoming_buffer;
+ uint32_t len = ctx->incoming_buffer_length;
+ uint32_t copy_len;
+
+ /*
+ * Only entire blocks can be hashed.
+ * Copy remainder to extra blocks buffer.
+ */
+ copy_len = len & (SHA512_BLOCK_SIZE-1);
+
+ if (copy_len) {
+ len -= copy_len;
+ memcpy(ctx->partial_block_buffer,
+ ((const char *) buffer + len),
+ copy_len);
+ ctx->partial_block_buffer_length = copy_len;
+ }
+
+ ctx->incoming_buffer_length = 0;
+
+ /* len should be a multiple of the block size now */
+ assert((len % SHA512_BLOCK_SIZE) == 0);
+
+ /* Set len to the number of blocks to be hashed */
+ len >>= SHA512_LOG2_BLOCK_SIZE;
+
+ if (len) {
+
+ ctx->job.buffer = (uint8_t *) buffer;
+ ctx->job.len = len;
+ ctx = (struct sha512_hash_ctx *)
+ sha512_job_mgr_submit(&mgr->mgr,
+ &ctx->job);
+ continue;
+ }
+ }
+
+ /*
+ * If the extra blocks are not empty, then we are
+ * either on the last block(s) or we need more
+ * user input before continuing.
+ */
+ if (ctx->status & HASH_CTX_STS_LAST) {
+
+ uint8_t *buf = ctx->partial_block_buffer;
+ uint32_t n_extra_blocks =
+ sha512_pad(buf, ctx->total_length);
+
+ ctx->status = (HASH_CTX_STS_PROCESSING |
+ HASH_CTX_STS_COMPLETE);
+ ctx->job.buffer = buf;
+ ctx->job.len = (uint32_t) n_extra_blocks;
+ ctx = (struct sha512_hash_ctx *)
+ sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
+ continue;
+ }
+
+ if (ctx)
+ ctx->status = HASH_CTX_STS_IDLE;
+ return ctx;
+ }
+
+ return NULL;
+}
+
+static struct sha512_hash_ctx
+ *sha512_ctx_mgr_get_comp_ctx(struct sha512_ctx_mgr *mgr)
+{
+ /*
+ * If get_comp_job returns NULL, there are no jobs complete.
+ * If get_comp_job returns a job, verify that it is safe to return to
+ * the user.
+ * If it is not ready, resubmit the job to finish processing.
+ * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
+ * returned.
+ * Otherwise, all jobs currently being managed by the hash_ctx_mgr
+ * still need processing.
+ */
+ struct sha512_hash_ctx *ctx;
+
+ ctx = (struct sha512_hash_ctx *)
+ sha512_job_mgr_get_comp_job(&mgr->mgr);
+ return sha512_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
+{
+ sha512_job_mgr_init(&mgr->mgr);
+}
+
+static struct sha512_hash_ctx
+ *sha512_ctx_mgr_submit(struct sha512_ctx_mgr *mgr,
+ struct sha512_hash_ctx *ctx,
+ const void *buffer,
+ uint32_t len,
+ int flags)
+{
+ if (flags & (~HASH_ENTIRE)) {
+ /*
+ * User should not pass anything other than FIRST, UPDATE, or
+ * LAST
+ */
+ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
+ return ctx;
+ }
+
+ if (ctx->status & HASH_CTX_STS_PROCESSING) {
+ /* Cannot submit to a currently processing job. */
+ ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
+ return ctx;
+ }
+
+ if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+ /* Cannot update a finished job. */
+ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
+ return ctx;
+ }
+
+
+ if (flags & HASH_FIRST) {
+ /* Init digest */
+ sha512_init_digest(ctx->job.result_digest);
+
+ /* Reset byte counter */
+ ctx->total_length = 0;
+
+ /* Clear extra blocks */
+ ctx->partial_block_buffer_length = 0;
+ }
+
+ /*
+ * If we made it here, there were no errors during this call to
+ * submit
+ */
+ ctx->error = HASH_CTX_ERROR_NONE;
+
+ /* Store buffer ptr info from user */
+ ctx->incoming_buffer = buffer;
+ ctx->incoming_buffer_length = len;
+
+ /*
+ * Store the user's request flags and mark this ctx as currently being
+ * processed.
+ */
+ ctx->status = (flags & HASH_LAST) ?
+ (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
+ HASH_CTX_STS_PROCESSING;
+
+ /* Advance byte counter */
+ ctx->total_length += len;
+
+ /*
+ * If there is anything currently buffered in the extra blocks,
+ * append to it until it contains a whole block.
+ * Or if the user's buffer contains less than a whole block,
+ * append as much as possible to the extra block.
+ */
+ if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
+ /* Compute how many bytes to copy from user buffer into extra
+ * block
+ */
+ uint32_t copy_len = SHA512_BLOCK_SIZE -
+ ctx->partial_block_buffer_length;
+ if (len < copy_len)
+ copy_len = len;
+
+ if (copy_len) {
+ /* Copy and update relevant pointers and counters */
+ memcpy
+ (&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
+ buffer, copy_len);
+
+ ctx->partial_block_buffer_length += copy_len;
+ ctx->incoming_buffer = (const void *)
+ ((const char *)buffer + copy_len);
+ ctx->incoming_buffer_length = len - copy_len;
+ }
+
+ /* The extra block should never contain more than 1 block
+ * here
+ */
+ assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
+
+ /* If the extra block buffer contains exactly 1 block, it can
+ * be hashed.
+ */
+ if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
+ ctx->partial_block_buffer_length = 0;
+
+ ctx->job.buffer = ctx->partial_block_buffer;
+ ctx->job.len = 1;
+ ctx = (struct sha512_hash_ctx *)
+ sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
+ }
+ }
+
+ return sha512_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr)
+{
+ struct sha512_hash_ctx *ctx;
+
+ while (1) {
+ ctx = (struct sha512_hash_ctx *)
+ sha512_job_mgr_flush(&mgr->mgr);
+
+ /* If flush returned 0, there are no more jobs in flight. */
+ if (!ctx)
+ return NULL;
+
+ /*
+ * If flush returned a job, resubmit the job to finish
+ * processing.
+ */
+ ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
+
+ /*
+ * If sha512_ctx_mgr_resubmit returned a job, it is ready to
+ * be returned. Otherwise, all jobs currently being managed by
+ * the sha512_ctx_mgr still need processing. Loop.
+ */
+ if (ctx)
+ return ctx;
+ }
+}
+
+static int sha512_mb_init(struct ahash_request *areq)
+{
+ struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
+
+ hash_ctx_init(sctx);
+ sctx->job.result_digest[0] = SHA512_H0;
+ sctx->job.result_digest[1] = SHA512_H1;
+ sctx->job.result_digest[2] = SHA512_H2;
+ sctx->job.result_digest[3] = SHA512_H3;
+ sctx->job.result_digest[4] = SHA512_H4;
+ sctx->job.result_digest[5] = SHA512_H5;
+ sctx->job.result_digest[6] = SHA512_H6;
+ sctx->job.result_digest[7] = SHA512_H7;
+ sctx->total_length = 0;
+ sctx->partial_block_buffer_length = 0;
+ sctx->status = HASH_CTX_STS_IDLE;
+
+ return 0;
+}
+
+static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
+{
+ int i;
+ struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
+ __be64 *dst = (__be64 *) rctx->out;
+
+ for (i = 0; i < 8; ++i)
+ dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
+
+ return 0;
+}
+
+static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
+ struct mcryptd_alg_cstate *cstate, bool flush)
+{
+ int flag = HASH_UPDATE;
+ int nbytes, err = 0;
+ struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
+ struct sha512_hash_ctx *sha_ctx;
+
+ /* more work ? */
+ while (!(rctx->flag & HASH_DONE)) {
+ nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
+ if (nbytes < 0) {
+ err = nbytes;
+ goto out;
+ }
+ /* check if the walk is done */
+ if (crypto_ahash_walk_last(&rctx->walk)) {
+ rctx->flag |= HASH_DONE;
+ if (rctx->flag & HASH_FINAL)
+ flag |= HASH_LAST;
+
+ }
+ sha_ctx = (struct sha512_hash_ctx *)
+ ahash_request_ctx(&rctx->areq);
+ kernel_fpu_begin();
+ sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx,
+ rctx->walk.data, nbytes, flag);
+ if (!sha_ctx) {
+ if (flush)
+ sha_ctx = sha512_ctx_mgr_flush(cstate->mgr);
+ }
+ kernel_fpu_end();
+ if (sha_ctx)
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ else {
+ rctx = NULL;
+ goto out;
+ }
+ }
+
+ /* copy the results */
+ if (rctx->flag & HASH_FINAL)
+ sha512_mb_set_results(rctx);
+
+out:
+ *ret_rctx = rctx;
+ return err;
+}
+
+static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
+ struct mcryptd_alg_cstate *cstate,
+ int err)
+{
+ struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+ struct sha512_hash_ctx *sha_ctx;
+ struct mcryptd_hash_request_ctx *req_ctx;
+ int ret;
+
+ /* remove from work list */
+ spin_lock(&cstate->work_lock);
+ list_del(&rctx->waiter);
+ spin_unlock(&cstate->work_lock);
+
+ if (irqs_disabled())
+ rctx->complete(&req->base, err);
+ else {
+ local_bh_disable();
+ rctx->complete(&req->base, err);
+ local_bh_enable();
+ }
+
+ /* check to see if there are other jobs that are done */
+ sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
+ while (sha_ctx) {
+ req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&req_ctx, cstate, false);
+ if (req_ctx) {
+ spin_lock(&cstate->work_lock);
+ list_del(&req_ctx->waiter);
+ spin_unlock(&cstate->work_lock);
+
+ req = cast_mcryptd_ctx_to_req(req_ctx);
+ if (irqs_disabled())
+ rctx->complete(&req->base, ret);
+ else {
+ local_bh_disable();
+ rctx->complete(&req->base, ret);
+ local_bh_enable();
+ }
+ }
+ sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
+ }
+
+ return 0;
+}
+
+static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
+ struct mcryptd_alg_cstate *cstate)
+{
+ unsigned long next_flush;
+ unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
+
+ /* initialize tag */
+ rctx->tag.arrival = jiffies; /* tag the arrival time */
+ rctx->tag.seq_num = cstate->next_seq_num++;
+ next_flush = rctx->tag.arrival + delay;
+ rctx->tag.expire = next_flush;
+
+ spin_lock(&cstate->work_lock);
+ list_add_tail(&rctx->waiter, &cstate->work_list);
+ spin_unlock(&cstate->work_lock);
+
+ mcryptd_arm_flusher(cstate, delay);
+}
+
+static int sha512_mb_update(struct ahash_request *areq)
+{
+ struct mcryptd_hash_request_ctx *rctx =
+ container_of(areq, struct mcryptd_hash_request_ctx,
+ areq);
+ struct mcryptd_alg_cstate *cstate =
+ this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
+
+ struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+ struct sha512_hash_ctx *sha_ctx;
+ int ret = 0, nbytes;
+
+
+ /* sanity check */
+ if (rctx->tag.cpu != smp_processor_id()) {
+ pr_err("mcryptd error: cpu clash\n");
+ goto done;
+ }
+
+ /* need to init context */
+ req_ctx_init(rctx, areq);
+
+ nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+ if (nbytes < 0) {
+ ret = nbytes;
+ goto done;
+ }
+
+ if (crypto_ahash_walk_last(&rctx->walk))
+ rctx->flag |= HASH_DONE;
+
+ /* submit */
+ sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
+ sha512_mb_add_list(rctx, cstate);
+ kernel_fpu_begin();
+ sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+ nbytes, HASH_UPDATE);
+ kernel_fpu_end();
+
+ /* check if anything is returned */
+ if (!sha_ctx)
+ return -EINPROGRESS;
+
+ if (sha_ctx->error) {
+ ret = sha_ctx->error;
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ goto done;
+ }
+
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&rctx, cstate, false);
+
+ if (!rctx)
+ return -EINPROGRESS;
+done:
+ sha_complete_job(rctx, cstate, ret);
+ return ret;
+}
+
+static int sha512_mb_finup(struct ahash_request *areq)
+{
+ struct mcryptd_hash_request_ctx *rctx =
+ container_of(areq, struct mcryptd_hash_request_ctx,
+ areq);
+ struct mcryptd_alg_cstate *cstate =
+ this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
+
+ struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+ struct sha512_hash_ctx *sha_ctx;
+ int ret = 0, flag = HASH_UPDATE, nbytes;
+
+ /* sanity check */
+ if (rctx->tag.cpu != smp_processor_id()) {
+ pr_err("mcryptd error: cpu clash\n");
+ goto done;
+ }
+
+ /* need to init context */
+ req_ctx_init(rctx, areq);
+
+ nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+ if (nbytes < 0) {
+ ret = nbytes;
+ goto done;
+ }
+
+ if (crypto_ahash_walk_last(&rctx->walk)) {
+ rctx->flag |= HASH_DONE;
+ flag = HASH_LAST;
+ }
+
+ /* submit */
+ rctx->flag |= HASH_FINAL;
+ sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
+ sha512_mb_add_list(rctx, cstate);
+
+ kernel_fpu_begin();
+ sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
+ nbytes, flag);
+ kernel_fpu_end();
+
+ /* check if anything is returned */
+ if (!sha_ctx)
+ return -EINPROGRESS;
+
+ if (sha_ctx->error) {
+ ret = sha_ctx->error;
+ goto done;
+ }
+
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&rctx, cstate, false);
+ if (!rctx)
+ return -EINPROGRESS;
+done:
+ sha_complete_job(rctx, cstate, ret);
+ return ret;
+}
+
+static int sha512_mb_final(struct ahash_request *areq)
+{
+ struct mcryptd_hash_request_ctx *rctx =
+ container_of(areq, struct mcryptd_hash_request_ctx,
+ areq);
+ struct mcryptd_alg_cstate *cstate =
+ this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
+
+ struct sha512_hash_ctx *sha_ctx;
+ int ret = 0;
+ u8 data;
+
+ /* sanity check */
+ if (rctx->tag.cpu != smp_processor_id()) {
+ pr_err("mcryptd error: cpu clash\n");
+ goto done;
+ }
+
+ /* need to init context */
+ req_ctx_init(rctx, areq);
+
+ rctx->flag |= HASH_DONE | HASH_FINAL;
+
+ sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
+ /* flag HASH_FINAL and 0 data size */
+ sha512_mb_add_list(rctx, cstate);
+ kernel_fpu_begin();
+ sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
+ HASH_LAST);
+ kernel_fpu_end();
+
+ /* check if anything is returned */
+ if (!sha_ctx)
+ return -EINPROGRESS;
+
+ if (sha_ctx->error) {
+ ret = sha_ctx->error;
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ goto done;
+ }
+
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&rctx, cstate, false);
+ if (!rctx)
+ return -EINPROGRESS;
+done:
+ sha_complete_job(rctx, cstate, ret);
+ return ret;
+}
+
+static int sha512_mb_export(struct ahash_request *areq, void *out)
+{
+ struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha512_mb_import(struct ahash_request *areq, const void *in)
+{
+ struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
+{
+ struct mcryptd_ahash *mcryptd_tfm;
+ struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct mcryptd_hash_ctx *mctx;
+
+ mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
+ CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
+ if (IS_ERR(mcryptd_tfm))
+ return PTR_ERR(mcryptd_tfm);
+ mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
+ mctx->alg_state = &sha512_mb_alg_state;
+ ctx->mcryptd_tfm = mcryptd_tfm;
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct ahash_request) +
+ crypto_ahash_reqsize(&mcryptd_tfm->base));
+
+ return 0;
+}
+
+static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
+{
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct ahash_request) +
+ sizeof(struct sha512_hash_ctx));
+
+ return 0;
+}
+
+static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static struct ahash_alg sha512_mb_areq_alg = {
+ .init = sha512_mb_init,
+ .update = sha512_mb_update,
+ .final = sha512_mb_final,
+ .finup = sha512_mb_finup,
+ .export = sha512_mb_export,
+ .import = sha512_mb_import,
+ .halg = {
+ .digestsize = SHA512_DIGEST_SIZE,
+ .statesize = sizeof(struct sha512_hash_ctx),
+ .base = {
+ .cra_name = "__sha512-mb",
+ .cra_driver_name = "__intel_sha512-mb",
+ .cra_priority = 100,
+ /*
+ * use ASYNC flag as some buffers in multi-buffer
+ * algo may not have completed before hashing thread
+ * sleep
+ */
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT
+ (sha512_mb_areq_alg.halg.base.cra_list),
+ .cra_init = sha512_mb_areq_init_tfm,
+ .cra_exit = sha512_mb_areq_exit_tfm,
+ .cra_ctxsize = sizeof(struct sha512_hash_ctx),
+ }
+ }
+};
+
+static int sha512_mb_async_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_init(mcryptd_req);
+}
+
+static int sha512_mb_async_update(struct ahash_request *req)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_update(mcryptd_req);
+}
+
+static int sha512_mb_async_finup(struct ahash_request *req)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_finup(mcryptd_req);
+}
+
+static int sha512_mb_async_final(struct ahash_request *req)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_final(mcryptd_req);
+}
+
+static int sha512_mb_async_digest(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_digest(mcryptd_req);
+}
+
+static int sha512_mb_async_export(struct ahash_request *req, void *out)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_export(mcryptd_req, out);
+}
+
+static int sha512_mb_async_import(struct ahash_request *req, const void *in)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+ struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
+ struct mcryptd_hash_request_ctx *rctx;
+ struct ahash_request *areq;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ rctx = ahash_request_ctx(mcryptd_req);
+
+ areq = &rctx->areq;
+
+ ahash_request_set_tfm(areq, child);
+ ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
+ rctx->complete, req);
+
+ return crypto_ahash_import(mcryptd_req, in);
+}
+
+static struct ahash_alg sha512_mb_async_alg = {
+ .init = sha512_mb_async_init,
+ .update = sha512_mb_async_update,
+ .final = sha512_mb_async_final,
+ .finup = sha512_mb_async_finup,
+ .digest = sha512_mb_async_digest,
+ .export = sha512_mb_async_export,
+ .import = sha512_mb_async_import,
+ .halg = {
+ .digestsize = SHA512_DIGEST_SIZE,
+ .statesize = sizeof(struct sha512_hash_ctx),
+ .base = {
+ .cra_name = "sha512",
+ .cra_driver_name = "sha512_mb",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH |
+ CRYPTO_ALG_ASYNC,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_type = &crypto_ahash_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT
+ (sha512_mb_async_alg.halg.base.cra_list),
+ .cra_init = sha512_mb_async_init_tfm,
+ .cra_exit = sha512_mb_async_exit_tfm,
+ .cra_ctxsize = sizeof(struct sha512_mb_ctx),
+ .cra_alignmask = 0,
+ },
+ },
+};
+
+static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
+{
+ struct mcryptd_hash_request_ctx *rctx;
+ unsigned long cur_time;
+ unsigned long next_flush = 0;
+ struct sha512_hash_ctx *sha_ctx;
+
+
+ cur_time = jiffies;
+
+ while (!list_empty(&cstate->work_list)) {
+ rctx = list_entry(cstate->work_list.next,
+ struct mcryptd_hash_request_ctx, waiter);
+ if time_before(cur_time, rctx->tag.expire)
+ break;
+ kernel_fpu_begin();
+ sha_ctx = (struct sha512_hash_ctx *)
+ sha512_ctx_mgr_flush(cstate->mgr);
+ kernel_fpu_end();
+ if (!sha_ctx) {
+ pr_err("sha512_mb error: nothing got flushed for"
+ " non-empty list\n");
+ break;
+ }
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ sha_finish_walk(&rctx, cstate, true);
+ sha_complete_job(rctx, cstate, 0);
+ }
+
+ if (!list_empty(&cstate->work_list)) {
+ rctx = list_entry(cstate->work_list.next,
+ struct mcryptd_hash_request_ctx, waiter);
+ /* get the hash context and then flush time */
+ next_flush = rctx->tag.expire;
+ mcryptd_arm_flusher(cstate, get_delay(next_flush));
+ }
+ return next_flush;
+}
+
+static int __init sha512_mb_mod_init(void)
+{
+
+ int cpu;
+ int err;
+ struct mcryptd_alg_cstate *cpu_state;
+
+ /* check for dependent cpu features */
+ if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_BMI2))
+ return -ENODEV;
+
+ /* initialize multibuffer structures */
+ sha512_mb_alg_state.alg_cstate =
+ alloc_percpu(struct mcryptd_alg_cstate);
+
+ sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
+ sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
+ sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
+ sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
+
+ if (!sha512_mb_alg_state.alg_cstate)
+ return -ENOMEM;
+ for_each_possible_cpu(cpu) {
+ cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
+ cpu_state->next_flush = 0;
+ cpu_state->next_seq_num = 0;
+ cpu_state->flusher_engaged = false;
+ INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
+ cpu_state->cpu = cpu;
+ cpu_state->alg_state = &sha512_mb_alg_state;
+ cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
+ GFP_KERNEL);
+ if (!cpu_state->mgr)
+ goto err2;
+ sha512_ctx_mgr_init(cpu_state->mgr);
+ INIT_LIST_HEAD(&cpu_state->work_list);
+ spin_lock_init(&cpu_state->work_lock);
+ }
+ sha512_mb_alg_state.flusher = &sha512_mb_flusher;
+
+ err = crypto_register_ahash(&sha512_mb_areq_alg);
+ if (err)
+ goto err2;
+ err = crypto_register_ahash(&sha512_mb_async_alg);
+ if (err)
+ goto err1;
+
+
+ return 0;
+err1:
+ crypto_unregister_ahash(&sha512_mb_areq_alg);
+err2:
+ for_each_possible_cpu(cpu) {
+ cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
+ kfree(cpu_state->mgr);
+ }
+ free_percpu(sha512_mb_alg_state.alg_cstate);
+ return -ENODEV;
+}
+
+static void __exit sha512_mb_mod_fini(void)
+{
+ int cpu;
+ struct mcryptd_alg_cstate *cpu_state;
+
+ crypto_unregister_ahash(&sha512_mb_async_alg);
+ crypto_unregister_ahash(&sha512_mb_areq_alg);
+ for_each_possible_cpu(cpu) {
+ cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
+ kfree(cpu_state->mgr);
+ }
+ free_percpu(sha512_mb_alg_state.alg_cstate);
+}
+
+module_init(sha512_mb_mod_init);
+module_exit(sha512_mb_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
+
+MODULE_ALIAS("sha512");
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
new file mode 100644
index 000000000000..9d4b2c8208d5
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
@@ -0,0 +1,130 @@
+/*
+ * Header file for multi buffer SHA512 context
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SHA_MB_CTX_INTERNAL_H
+#define _SHA_MB_CTX_INTERNAL_H
+
+#include "sha512_mb_mgr.h"
+
+#define HASH_UPDATE 0x00
+#define HASH_FIRST 0x01
+#define HASH_LAST 0x02
+#define HASH_ENTIRE 0x03
+#define HASH_DONE 0x04
+#define HASH_FINAL 0x08
+
+#define HASH_CTX_STS_IDLE 0x00
+#define HASH_CTX_STS_PROCESSING 0x01
+#define HASH_CTX_STS_LAST 0x02
+#define HASH_CTX_STS_COMPLETE 0x04
+
+enum hash_ctx_error {
+ HASH_CTX_ERROR_NONE = 0,
+ HASH_CTX_ERROR_INVALID_FLAGS = -1,
+ HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
+ HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
+};
+
+#define hash_ctx_user_data(ctx) ((ctx)->user_data)
+#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
+#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
+#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
+#define hash_ctx_status(ctx) ((ctx)->status)
+#define hash_ctx_error(ctx) ((ctx)->error)
+#define hash_ctx_init(ctx) \
+ do { \
+ (ctx)->error = HASH_CTX_ERROR_NONE; \
+ (ctx)->status = HASH_CTX_STS_COMPLETE; \
+ } while (0)
+
+/* Hash Constants and Typedefs */
+#define SHA512_DIGEST_LENGTH 8
+#define SHA512_LOG2_BLOCK_SIZE 7
+
+#define SHA512_PADLENGTHFIELD_SIZE 16
+
+#ifdef SHA_MB_DEBUG
+#define assert(expr) \
+do { \
+ if (unlikely(!(expr))) { \
+ printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
+ #expr, __FILE__, __func__, __LINE__); \
+ } \
+} while (0)
+#else
+#define assert(expr) do {} while (0)
+#endif
+
+struct sha512_ctx_mgr {
+ struct sha512_mb_mgr mgr;
+};
+
+/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
+
+struct sha512_hash_ctx {
+ /* Must be at struct offset 0 */
+ struct job_sha512 job;
+ /* status flag */
+ int status;
+ /* error flag */
+ int error;
+
+ uint32_t total_length;
+ const void *incoming_buffer;
+ uint32_t incoming_buffer_length;
+ uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2];
+ uint32_t partial_block_buffer_length;
+ void *user_data;
+};
+
+#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
new file mode 100644
index 000000000000..178f17eef382
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
@@ -0,0 +1,104 @@
+/*
+ * Header file for multi buffer SHA512 algorithm manager
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SHA_MB_MGR_H
+#define __SHA_MB_MGR_H
+
+#include <linux/types.h>
+
+#define NUM_SHA512_DIGEST_WORDS 8
+
+enum job_sts {STS_UNKNOWN = 0,
+ STS_BEING_PROCESSED = 1,
+ STS_COMPLETED = 2,
+ STS_INTERNAL_ERROR = 3,
+ STS_ERROR = 4
+};
+
+struct job_sha512 {
+ u8 *buffer;
+ u64 len;
+ u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
+ enum job_sts status;
+ void *user_data;
+};
+
+struct sha512_args_x4 {
+ uint64_t digest[8][4];
+ uint8_t *data_ptr[4];
+};
+
+struct sha512_lane_data {
+ struct job_sha512 *job_in_lane;
+};
+
+struct sha512_mb_mgr {
+ struct sha512_args_x4 args;
+
+ uint64_t lens[4];
+
+ /* each byte is index (0...7) of unused lanes */
+ uint64_t unused_lanes;
+ /* byte 4 is set to FF as a flag */
+ struct sha512_lane_data ldata[4];
+};
+
+#define SHA512_MB_MGR_NUM_LANES_AVX2 4
+
+void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
+struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
+ struct job_sha512 *job);
+struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
+struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
+
+#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
new file mode 100644
index 000000000000..cf2636d4c9ba
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
@@ -0,0 +1,281 @@
+/*
+ * Header file for multi buffer SHA256 algorithm data structure
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+# Macros for defining data structures
+
+# Usage example
+
+#START_FIELDS # JOB_AES
+### name size align
+#FIELD _plaintext, 8, 8 # pointer to plaintext
+#FIELD _ciphertext, 8, 8 # pointer to ciphertext
+#FIELD _IV, 16, 8 # IV
+#FIELD _keys, 8, 8 # pointer to keys
+#FIELD _len, 4, 4 # length in bytes
+#FIELD _status, 4, 4 # status enumeration
+#FIELD _user_data, 8, 8 # pointer to user data
+#UNION _union, size1, align1, \
+# size2, align2, \
+# size3, align3, \
+# ...
+#END_FIELDS
+#%assign _JOB_AES_size _FIELD_OFFSET
+#%assign _JOB_AES_align _STRUCT_ALIGN
+
+#########################################################################
+
+# Alternate "struc-like" syntax:
+# STRUCT job_aes2
+# RES_Q .plaintext, 1
+# RES_Q .ciphertext, 1
+# RES_DQ .IV, 1
+# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
+# RES_U .union, size1, align1, \
+# size2, align2, \
+# ...
+# ENDSTRUCT
+# # Following only needed if nesting
+# %assign job_aes2_size _FIELD_OFFSET
+# %assign job_aes2_align _STRUCT_ALIGN
+#
+# RES_* macros take a name, a count and an optional alignment.
+# The count in in terms of the base size of the macro, and the
+# default alignment is the base size.
+# The macros are:
+# Macro Base size
+# RES_B 1
+# RES_W 2
+# RES_D 4
+# RES_Q 8
+# RES_DQ 16
+# RES_Y 32
+# RES_Z 64
+#
+# RES_U defines a union. It's arguments are a name and two or more
+# pairs of "size, alignment"
+#
+# The two assigns are only needed if this structure is being nested
+# within another. Even if the assigns are not done, one can still use
+# STRUCT_NAME_size as the size of the structure.
+#
+# Note that for nesting, you still need to assign to STRUCT_NAME_size.
+#
+# The differences between this and using "struc" directly are that each
+# type is implicitly aligned to its natural length (although this can be
+# over-ridden with an explicit third parameter), and that the structure
+# is padded at the end to its overall alignment.
+#
+
+#########################################################################
+
+#ifndef _DATASTRUCT_ASM_
+#define _DATASTRUCT_ASM_
+
+#define PTR_SZ 8
+#define SHA512_DIGEST_WORD_SIZE 8
+#define SHA512_MB_MGR_NUM_LANES_AVX2 4
+#define NUM_SHA512_DIGEST_WORDS 8
+#define SZ4 4*SHA512_DIGEST_WORD_SIZE
+#define ROUNDS 80*SZ4
+#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
+
+# START_FIELDS
+.macro START_FIELDS
+ _FIELD_OFFSET = 0
+ _STRUCT_ALIGN = 0
+.endm
+
+# FIELD name size align
+.macro FIELD name size align
+ _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
+ \name = _FIELD_OFFSET
+ _FIELD_OFFSET = _FIELD_OFFSET + (\size)
+.if (\align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = \align
+.endif
+.endm
+
+# END_FIELDS
+.macro END_FIELDS
+ _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
+.endm
+
+.macro STRUCT p1
+START_FIELDS
+.struc \p1
+.endm
+
+.macro ENDSTRUCT
+ tmp = _FIELD_OFFSET
+ END_FIELDS
+ tmp = (_FIELD_OFFSET - ##tmp)
+.if (tmp > 0)
+ .lcomm tmp
+.endm
+
+## RES_int name size align
+.macro RES_int p1 p2 p3
+ name = \p1
+ size = \p2
+ align = .\p3
+
+ _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
+.align align
+.lcomm name size
+ _FIELD_OFFSET = _FIELD_OFFSET + (size)
+.if (align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = align
+.endif
+.endm
+
+# macro RES_B name, size [, align]
+.macro RES_B _name, _size, _align=1
+RES_int _name _size _align
+.endm
+
+# macro RES_W name, size [, align]
+.macro RES_W _name, _size, _align=2
+RES_int _name 2*(_size) _align
+.endm
+
+# macro RES_D name, size [, align]
+.macro RES_D _name, _size, _align=4
+RES_int _name 4*(_size) _align
+.endm
+
+# macro RES_Q name, size [, align]
+.macro RES_Q _name, _size, _align=8
+RES_int _name 8*(_size) _align
+.endm
+
+# macro RES_DQ name, size [, align]
+.macro RES_DQ _name, _size, _align=16
+RES_int _name 16*(_size) _align
+.endm
+
+# macro RES_Y name, size [, align]
+.macro RES_Y _name, _size, _align=32
+RES_int _name 32*(_size) _align
+.endm
+
+# macro RES_Z name, size [, align]
+.macro RES_Z _name, _size, _align=64
+RES_int _name 64*(_size) _align
+.endm
+
+#endif
+
+###################################################################
+### Define SHA512 Out Of Order Data Structures
+###################################################################
+
+START_FIELDS # LANE_DATA
+### name size align
+FIELD _job_in_lane, 8, 8 # pointer to job object
+END_FIELDS
+
+ _LANE_DATA_size = _FIELD_OFFSET
+ _LANE_DATA_align = _STRUCT_ALIGN
+
+####################################################################
+
+START_FIELDS # SHA512_ARGS_X4
+### name size align
+FIELD _digest, 8*8*4, 4 # transposed digest
+FIELD _data_ptr, 8*4, 8 # array of pointers to data
+END_FIELDS
+
+ _SHA512_ARGS_X4_size = _FIELD_OFFSET
+ _SHA512_ARGS_X4_align = _STRUCT_ALIGN
+
+#####################################################################
+
+START_FIELDS # MB_MGR
+### name size align
+FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
+FIELD _lens, 8*4, 8
+FIELD _unused_lanes, 8, 8
+FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align
+END_FIELDS
+
+ _MB_MGR_size = _FIELD_OFFSET
+ _MB_MGR_align = _STRUCT_ALIGN
+
+_args_digest = _args + _digest
+_args_data_ptr = _args + _data_ptr
+
+#######################################################################
+
+#######################################################################
+#### Define constants
+#######################################################################
+
+#define STS_UNKNOWN 0
+#define STS_BEING_PROCESSED 1
+#define STS_COMPLETED 2
+
+#######################################################################
+#### Define JOB_SHA512 structure
+#######################################################################
+
+START_FIELDS # JOB_SHA512
+### name size align
+FIELD _buffer, 8, 8 # pointer to buffer
+FIELD _len, 8, 8 # length in bytes
+FIELD _result_digest, 8*8, 32 # Digest (output)
+FIELD _status, 4, 4
+FIELD _user_data, 8, 8
+END_FIELDS
+
+ _JOB_SHA512_size = _FIELD_OFFSET
+ _JOB_SHA512_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
new file mode 100644
index 000000000000..3ddba19a0db6
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
@@ -0,0 +1,291 @@
+/*
+ * Flush routine for SHA512 multibuffer
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include "sha512_mb_mgr_datastruct.S"
+
+.extern sha512_x4_avx2
+
+# LINUX register definitions
+#define arg1 %rdi
+#define arg2 %rsi
+
+# idx needs to be other than arg1, arg2, rbx, r12
+#define idx %rdx
+
+# Common definitions
+#define state arg1
+#define job arg2
+#define len2 arg2
+
+#define unused_lanes %rbx
+#define lane_data %rbx
+#define tmp2 %rbx
+
+#define job_rax %rax
+#define tmp1 %rax
+#define size_offset %rax
+#define tmp %rax
+#define start_offset %rax
+
+#define tmp3 arg1
+
+#define extra_blocks arg2
+#define p arg2
+
+#define tmp4 %r8
+#define lens0 %r8
+
+#define lens1 %r9
+#define lens2 %r10
+#define lens3 %r11
+
+.macro LABEL prefix n
+\prefix\n\():
+.endm
+
+.macro JNE_SKIP i
+jne skip_\i
+.endm
+
+.altmacro
+.macro SET_OFFSET _offset
+offset = \_offset
+.endm
+.noaltmacro
+
+# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
+# arg 1 : rcx : state
+ENTRY(sha512_mb_mgr_flush_avx2)
+ FRAME_BEGIN
+ push %rbx
+
+ # If bit (32+3) is set, then all lanes are empty
+ mov _unused_lanes(state), unused_lanes
+ bt $32+7, unused_lanes
+ jc return_null
+
+ # find a lane with a non-null job
+ xor idx, idx
+ offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne one(%rip), idx
+ offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne two(%rip), idx
+ offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne three(%rip), idx
+
+ # copy idx to empty lanes
+copy_lane_data:
+ offset = (_args + _data_ptr)
+ mov offset(state,idx,8), tmp
+
+ I = 0
+.rep 4
+ offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+.altmacro
+ JNE_SKIP %I
+ offset = (_args + _data_ptr + 8*I)
+ mov tmp, offset(state)
+ offset = (_lens + 8*I +4)
+ movl $0xFFFFFFFF, offset(state)
+LABEL skip_ %I
+ I = (I+1)
+.noaltmacro
+.endr
+
+ # Find min length
+ mov _lens + 0*8(state),lens0
+ mov lens0,idx
+ mov _lens + 1*8(state),lens1
+ cmp idx,lens1
+ cmovb lens1,idx
+ mov _lens + 2*8(state),lens2
+ cmp idx,lens2
+ cmovb lens2,idx
+ mov _lens + 3*8(state),lens3
+ cmp idx,lens3
+ cmovb lens3,idx
+ mov idx,len2
+ and $0xF,idx
+ and $~0xFF,len2
+ jz len_is_0
+
+ sub len2, lens0
+ sub len2, lens1
+ sub len2, lens2
+ sub len2, lens3
+ shr $32,len2
+ mov lens0, _lens + 0*8(state)
+ mov lens1, _lens + 1*8(state)
+ mov lens2, _lens + 2*8(state)
+ mov lens3, _lens + 3*8(state)
+
+ # "state" and "args" are the same address, arg1
+ # len is arg2
+ call sha512_x4_avx2
+ # state and idx are intact
+
+len_is_0:
+ # process completed job "idx"
+ imul $_LANE_DATA_size, idx, lane_data
+ lea _ldata(state, lane_data), lane_data
+
+ mov _job_in_lane(lane_data), job_rax
+ movq $0, _job_in_lane(lane_data)
+ movl $STS_COMPLETED, _status(job_rax)
+ mov _unused_lanes(state), unused_lanes
+ shl $8, unused_lanes
+ or idx, unused_lanes
+ mov unused_lanes, _unused_lanes(state)
+
+ movl $0xFFFFFFFF, _lens+4(state, idx, 8)
+
+ vmovq _args_digest+0*32(state, idx, 8), %xmm0
+ vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
+ vmovq _args_digest+2*32(state, idx, 8), %xmm1
+ vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
+ vmovq _args_digest+4*32(state, idx, 8), %xmm2
+ vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
+ vmovq _args_digest+6*32(state, idx, 8), %xmm3
+ vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
+
+ vmovdqu %xmm0, _result_digest(job_rax)
+ vmovdqu %xmm1, _result_digest+1*16(job_rax)
+ vmovdqu %xmm2, _result_digest+2*16(job_rax)
+ vmovdqu %xmm3, _result_digest+3*16(job_rax)
+
+return:
+ pop %rbx
+ FRAME_END
+ ret
+
+return_null:
+ xor job_rax, job_rax
+ jmp return
+ENDPROC(sha512_mb_mgr_flush_avx2)
+.align 16
+
+ENTRY(sha512_mb_mgr_get_comp_job_avx2)
+ push %rbx
+
+ mov _unused_lanes(state), unused_lanes
+ bt $(32+7), unused_lanes
+ jc .return_null
+
+ # Find min length
+ mov _lens(state),lens0
+ mov lens0,idx
+ mov _lens+1*8(state),lens1
+ cmp idx,lens1
+ cmovb lens1,idx
+ mov _lens+2*8(state),lens2
+ cmp idx,lens2
+ cmovb lens2,idx
+ mov _lens+3*8(state),lens3
+ cmp idx,lens3
+ cmovb lens3,idx
+ test $~0xF,idx
+ jnz .return_null
+ and $0xF,idx
+
+ #process completed job "idx"
+ imul $_LANE_DATA_size, idx, lane_data
+ lea _ldata(state, lane_data), lane_data
+
+ mov _job_in_lane(lane_data), job_rax
+ movq $0, _job_in_lane(lane_data)
+ movl $STS_COMPLETED, _status(job_rax)
+ mov _unused_lanes(state), unused_lanes
+ shl $8, unused_lanes
+ or idx, unused_lanes
+ mov unused_lanes, _unused_lanes(state)
+
+ movl $0xFFFFFFFF, _lens+4(state, idx, 8)
+
+ vmovq _args_digest(state, idx, 8), %xmm0
+ vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
+ vmovq _args_digest+2*32(state, idx, 8), %xmm1
+ vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
+ vmovq _args_digest+4*32(state, idx, 8), %xmm2
+ vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
+ vmovq _args_digest+6*32(state, idx, 8), %xmm3
+ vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
+
+ vmovdqu %xmm0, _result_digest+0*16(job_rax)
+ vmovdqu %xmm1, _result_digest+1*16(job_rax)
+ vmovdqu %xmm2, _result_digest+2*16(job_rax)
+ vmovdqu %xmm3, _result_digest+3*16(job_rax)
+
+ pop %rbx
+
+ ret
+
+.return_null:
+ xor job_rax, job_rax
+ pop %rbx
+ ret
+ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
+.data
+
+.align 16
+one:
+.quad 1
+two:
+.quad 2
+three:
+.quad 3
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
new file mode 100644
index 000000000000..36870b26067a
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
@@ -0,0 +1,67 @@
+/*
+ * Initialization code for multi buffer SHA256 algorithm for AVX2
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "sha512_mb_mgr.h"
+
+void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
+{
+ unsigned int j;
+
+ state->lens[0] = 0;
+ state->lens[1] = 1;
+ state->lens[2] = 2;
+ state->lens[3] = 3;
+ state->unused_lanes = 0xFF03020100;
+ for (j = 0; j < 4; j++)
+ state->ldata[j].job_in_lane = NULL;
+}
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
new file mode 100644
index 000000000000..815f07bdd1f8
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
@@ -0,0 +1,222 @@
+/*
+ * Buffer submit code for multi buffer SHA512 algorithm
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+#include "sha512_mb_mgr_datastruct.S"
+
+.extern sha512_x4_avx2
+
+#define arg1 %rdi
+#define arg2 %rsi
+
+#define idx %rdx
+#define last_len %rdx
+
+#define size_offset %rcx
+#define tmp2 %rcx
+
+# Common definitions
+#define state arg1
+#define job arg2
+#define len2 arg2
+#define p2 arg2
+
+#define p %r11
+#define start_offset %r11
+
+#define unused_lanes %rbx
+
+#define job_rax %rax
+#define len %rax
+
+#define lane %r12
+#define tmp3 %r12
+#define lens3 %r12
+
+#define extra_blocks %r8
+#define lens0 %r8
+
+#define tmp %r9
+#define lens1 %r9
+
+#define lane_data %r10
+#define lens2 %r10
+
+#define DWORD_len %eax
+
+# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
+# arg 1 : rcx : state
+# arg 2 : rdx : job
+ENTRY(sha512_mb_mgr_submit_avx2)
+ FRAME_BEGIN
+ push %rbx
+ push %r12
+
+ mov _unused_lanes(state), unused_lanes
+ movzb %bl,lane
+ shr $8, unused_lanes
+ imul $_LANE_DATA_size, lane,lane_data
+ movl $STS_BEING_PROCESSED, _status(job)
+ lea _ldata(state, lane_data), lane_data
+ mov unused_lanes, _unused_lanes(state)
+ movl _len(job), DWORD_len
+
+ mov job, _job_in_lane(lane_data)
+ movl DWORD_len,_lens+4(state , lane, 8)
+
+ # Load digest words from result_digest
+ vmovdqu _result_digest+0*16(job), %xmm0
+ vmovdqu _result_digest+1*16(job), %xmm1
+ vmovdqu _result_digest+2*16(job), %xmm2
+ vmovdqu _result_digest+3*16(job), %xmm3
+
+ vmovq %xmm0, _args_digest(state, lane, 8)
+ vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8)
+ vmovq %xmm1, _args_digest+2*32(state , lane, 8)
+ vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8)
+ vmovq %xmm2, _args_digest+4*32(state , lane, 8)
+ vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8)
+ vmovq %xmm3, _args_digest+6*32(state , lane, 8)
+ vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8)
+
+ mov _buffer(job), p
+ mov p, _args_data_ptr(state, lane, 8)
+
+ cmp $0xFF, unused_lanes
+ jne return_null
+
+start_loop:
+
+ # Find min length
+ mov _lens+0*8(state),lens0
+ mov lens0,idx
+ mov _lens+1*8(state),lens1
+ cmp idx,lens1
+ cmovb lens1, idx
+ mov _lens+2*8(state),lens2
+ cmp idx,lens2
+ cmovb lens2,idx
+ mov _lens+3*8(state),lens3
+ cmp idx,lens3
+ cmovb lens3,idx
+ mov idx,len2
+ and $0xF,idx
+ and $~0xFF,len2
+ jz len_is_0
+
+ sub len2,lens0
+ sub len2,lens1
+ sub len2,lens2
+ sub len2,lens3
+ shr $32,len2
+ mov lens0, _lens + 0*8(state)
+ mov lens1, _lens + 1*8(state)
+ mov lens2, _lens + 2*8(state)
+ mov lens3, _lens + 3*8(state)
+
+ # "state" and "args" are the same address, arg1
+ # len is arg2
+ call sha512_x4_avx2
+ # state and idx are intact
+
+len_is_0:
+
+ # process completed job "idx"
+ imul $_LANE_DATA_size, idx, lane_data
+ lea _ldata(state, lane_data), lane_data
+
+ mov _job_in_lane(lane_data), job_rax
+ mov _unused_lanes(state), unused_lanes
+ movq $0, _job_in_lane(lane_data)
+ movl $STS_COMPLETED, _status(job_rax)
+ shl $8, unused_lanes
+ or idx, unused_lanes
+ mov unused_lanes, _unused_lanes(state)
+
+ movl $0xFFFFFFFF,_lens+4(state,idx,8)
+ vmovq _args_digest+0*32(state , idx, 8), %xmm0
+ vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
+ vmovq _args_digest+2*32(state , idx, 8), %xmm1
+ vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
+ vmovq _args_digest+4*32(state , idx, 8), %xmm2
+ vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
+ vmovq _args_digest+6*32(state , idx, 8), %xmm3
+ vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
+
+ vmovdqu %xmm0, _result_digest + 0*16(job_rax)
+ vmovdqu %xmm1, _result_digest + 1*16(job_rax)
+ vmovdqu %xmm2, _result_digest + 2*16(job_rax)
+ vmovdqu %xmm3, _result_digest + 3*16(job_rax)
+
+return:
+ pop %r12
+ pop %rbx
+ FRAME_END
+ ret
+
+return_null:
+ xor job_rax, job_rax
+ jmp return
+ENDPROC(sha512_mb_mgr_submit_avx2)
+.data
+
+.align 16
+H0: .int 0x6a09e667
+H1: .int 0xbb67ae85
+H2: .int 0x3c6ef372
+H3: .int 0xa54ff53a
+H4: .int 0x510e527f
+H5: .int 0x9b05688c
+H6: .int 0x1f83d9ab
+H7: .int 0x5be0cd19
diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
new file mode 100644
index 000000000000..31ab1eff6413
--- /dev/null
+++ b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
@@ -0,0 +1,529 @@
+/*
+ * Multi-buffer SHA512 algorithm hash compute routine
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Megha Dey <megha.dey@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+# code to compute quad SHA512 using AVX2
+# use YMMs to tackle the larger digest size
+# outer calling routine takes care of save and restore of XMM registers
+# Logic designed/laid out by JDG
+
+# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
+# Stack must be aligned to 32 bytes before call
+# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
+# Linux preserves: rcx rdx rdi rbp r13 r14 r15
+# clobbers ymm0-15
+
+#include <linux/linkage.h>
+#include "sha512_mb_mgr_datastruct.S"
+
+arg1 = %rdi
+arg2 = %rsi
+
+# Common definitions
+STATE = arg1
+INP_SIZE = arg2
+
+IDX = %rax
+ROUND = %rbx
+TBL = %r8
+
+inp0 = %r9
+inp1 = %r10
+inp2 = %r11
+inp3 = %r12
+
+a = %ymm0
+b = %ymm1
+c = %ymm2
+d = %ymm3
+e = %ymm4
+f = %ymm5
+g = %ymm6
+h = %ymm7
+
+a0 = %ymm8
+a1 = %ymm9
+a2 = %ymm10
+
+TT0 = %ymm14
+TT1 = %ymm13
+TT2 = %ymm12
+TT3 = %ymm11
+TT4 = %ymm10
+TT5 = %ymm9
+
+T1 = %ymm14
+TMP = %ymm15
+
+# Define stack usage
+STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
+
+#define VMOVPD vmovupd
+_digest = SZ4*16
+
+# transpose r0, r1, r2, r3, t0, t1
+# "transpose" data in {r0..r3} using temps {t0..t3}
+# Input looks like: {r0 r1 r2 r3}
+# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
+# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
+# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
+# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
+#
+# output looks like: {t0 r1 r0 r3}
+# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
+# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
+# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
+# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
+
+.macro TRANSPOSE r0 r1 r2 r3 t0 t1
+ vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
+ vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
+ vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
+ vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
+
+ vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6
+ vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2
+ vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5
+ vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1
+.endm
+
+.macro ROTATE_ARGS
+TMP_ = h
+h = g
+g = f
+f = e
+e = d
+d = c
+c = b
+b = a
+a = TMP_
+.endm
+
+# PRORQ reg, imm, tmp
+# packed-rotate-right-double
+# does a rotate by doing two shifts and an or
+.macro _PRORQ reg imm tmp
+ vpsllq $(64-\imm),\reg,\tmp
+ vpsrlq $\imm,\reg, \reg
+ vpor \tmp,\reg, \reg
+.endm
+
+# non-destructive
+# PRORQ_nd reg, imm, tmp, src
+.macro _PRORQ_nd reg imm tmp src
+ vpsllq $(64-\imm), \src, \tmp
+ vpsrlq $\imm, \src, \reg
+ vpor \tmp, \reg, \reg
+.endm
+
+# PRORQ dst/src, amt
+.macro PRORQ reg imm
+ _PRORQ \reg, \imm, TMP
+.endm
+
+# PRORQ_nd dst, src, amt
+.macro PRORQ_nd reg tmp imm
+ _PRORQ_nd \reg, \imm, TMP, \tmp
+.endm
+
+#; arguments passed implicitly in preprocessor symbols i, a...h
+.macro ROUND_00_15 _T1 i
+ PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4)
+
+ vpxor g, f, a2 # ch: a2 = f^g
+ vpand e,a2, a2 # ch: a2 = (f^g)&e
+ vpxor g, a2, a2 # a2 = ch
+
+ PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25)
+
+ offset = SZ4*(\i & 0xf)
+ vmovdqu \_T1,offset(%rsp)
+ vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
+ vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
+ PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11)
+ vpaddq a2, h, h # h = h + ch
+ PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11)
+ vpaddq \_T1,h, h # h = h + ch + W + K
+ vpxor a1, a0, a0 # a0 = sigma1
+ vmovdqu a,\_T1
+ PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22)
+ vpxor c, \_T1, \_T1 # maj: T1 = a^c
+ add $SZ4, ROUND # ROUND++
+ vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
+ vpaddq a0, h, h
+ vpaddq h, d, d
+ vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
+ PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13)
+ vpxor a1, a2, a2 # a2 = sig0
+ vpand c, a, a1 # maj: a1 = a&c
+ vpor \_T1, a1, a1 # a1 = maj
+ vpaddq a1, h, h # h = h + ch + W + K + maj
+ vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0
+ ROTATE_ARGS
+.endm
+
+
+#; arguments passed implicitly in preprocessor symbols i, a...h
+.macro ROUND_16_XX _T1 i
+ vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
+ vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
+ vmovdqu \_T1, a0
+ PRORQ \_T1,7
+ vmovdqu a1, a2
+ PRORQ a1,42
+ vpxor a0, \_T1, \_T1
+ PRORQ \_T1, 1
+ vpxor a2, a1, a1
+ PRORQ a1, 19
+ vpsrlq $7, a0, a0
+ vpxor a0, \_T1, \_T1
+ vpsrlq $6, a2, a2
+ vpxor a2, a1, a1
+ vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
+ vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1
+ vpaddq a1, \_T1, \_T1
+
+ ROUND_00_15 \_T1,\i
+.endm
+
+
+# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
+# arg 1 : STATE : pointer to input data
+# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
+ENTRY(sha512_x4_avx2)
+ # general registers preserved in outer calling routine
+ # outer calling routine saves all the XMM registers
+ # save callee-saved clobbered registers to comply with C function ABI
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ sub $STACK_SPACE1, %rsp
+
+ # Load the pre-transposed incoming digest.
+ vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
+ vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
+ vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
+ vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
+ vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
+ vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
+ vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
+ vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
+
+ lea K512_4(%rip),TBL
+
+ # load the address of each of the 4 message lanes
+ # getting ready to transpose input onto stack
+ mov _data_ptr+0*PTR_SZ(STATE),inp0
+ mov _data_ptr+1*PTR_SZ(STATE),inp1
+ mov _data_ptr+2*PTR_SZ(STATE),inp2
+ mov _data_ptr+3*PTR_SZ(STATE),inp3
+
+ xor IDX, IDX
+lloop:
+ xor ROUND, ROUND
+
+ # save old digest
+ vmovdqu a, _digest(%rsp)
+ vmovdqu b, _digest+1*SZ4(%rsp)
+ vmovdqu c, _digest+2*SZ4(%rsp)
+ vmovdqu d, _digest+3*SZ4(%rsp)
+ vmovdqu e, _digest+4*SZ4(%rsp)
+ vmovdqu f, _digest+5*SZ4(%rsp)
+ vmovdqu g, _digest+6*SZ4(%rsp)
+ vmovdqu h, _digest+7*SZ4(%rsp)
+ i = 0
+.rep 4
+ vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
+ VMOVPD i*32(inp0, IDX), TT2
+ VMOVPD i*32(inp1, IDX), TT1
+ VMOVPD i*32(inp2, IDX), TT4
+ VMOVPD i*32(inp3, IDX), TT3
+ TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5
+ vpshufb TMP, TT0, TT0
+ vpshufb TMP, TT1, TT1
+ vpshufb TMP, TT2, TT2
+ vpshufb TMP, TT3, TT3
+ ROUND_00_15 TT0,(i*4+0)
+ ROUND_00_15 TT1,(i*4+1)
+ ROUND_00_15 TT2,(i*4+2)
+ ROUND_00_15 TT3,(i*4+3)
+ i = (i+1)
+.endr
+ add $128, IDX
+
+ i = (i*4)
+
+ jmp Lrounds_16_xx
+.align 16
+Lrounds_16_xx:
+.rep 16
+ ROUND_16_XX T1, i
+ i = (i+1)
+.endr
+ cmp $0xa00,ROUND
+ jb Lrounds_16_xx
+
+ # add old digest
+ vpaddq _digest(%rsp), a, a
+ vpaddq _digest+1*SZ4(%rsp), b, b
+ vpaddq _digest+2*SZ4(%rsp), c, c
+ vpaddq _digest+3*SZ4(%rsp), d, d
+ vpaddq _digest+4*SZ4(%rsp), e, e
+ vpaddq _digest+5*SZ4(%rsp), f, f
+ vpaddq _digest+6*SZ4(%rsp), g, g
+ vpaddq _digest+7*SZ4(%rsp), h, h
+
+ sub $1, INP_SIZE # unit is blocks
+ jne lloop
+
+ # write back to memory (state object) the transposed digest
+ vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
+ vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
+ vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
+ vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
+ vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
+ vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
+ vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
+ vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
+
+ # update input data pointers
+ add IDX, inp0
+ mov inp0, _data_ptr+0*PTR_SZ(STATE)
+ add IDX, inp1
+ mov inp1, _data_ptr+1*PTR_SZ(STATE)
+ add IDX, inp2
+ mov inp2, _data_ptr+2*PTR_SZ(STATE)
+ add IDX, inp3
+ mov inp3, _data_ptr+3*PTR_SZ(STATE)
+
+ #;;;;;;;;;;;;;;;
+ #; Postamble
+ add $STACK_SPACE1, %rsp
+ # restore callee-saved clobbered registers
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+
+ # outer calling routine restores XMM and other GP registers
+ ret
+ENDPROC(sha512_x4_avx2)
+
+.data
+.align 64
+K512_4:
+ .octa 0x428a2f98d728ae22428a2f98d728ae22,\
+ 0x428a2f98d728ae22428a2f98d728ae22
+ .octa 0x7137449123ef65cd7137449123ef65cd,\
+ 0x7137449123ef65cd7137449123ef65cd
+ .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
+ 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
+ .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
+ 0xe9b5dba58189dbbce9b5dba58189dbbc
+ .octa 0x3956c25bf348b5383956c25bf348b538,\
+ 0x3956c25bf348b5383956c25bf348b538
+ .octa 0x59f111f1b605d01959f111f1b605d019,\
+ 0x59f111f1b605d01959f111f1b605d019
+ .octa 0x923f82a4af194f9b923f82a4af194f9b,\
+ 0x923f82a4af194f9b923f82a4af194f9b
+ .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
+ 0xab1c5ed5da6d8118ab1c5ed5da6d8118
+ .octa 0xd807aa98a3030242d807aa98a3030242,\
+ 0xd807aa98a3030242d807aa98a3030242
+ .octa 0x12835b0145706fbe12835b0145706fbe,\
+ 0x12835b0145706fbe12835b0145706fbe
+ .octa 0x243185be4ee4b28c243185be4ee4b28c,\
+ 0x243185be4ee4b28c243185be4ee4b28c
+ .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
+ 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
+ .octa 0x72be5d74f27b896f72be5d74f27b896f,\
+ 0x72be5d74f27b896f72be5d74f27b896f
+ .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
+ 0x80deb1fe3b1696b180deb1fe3b1696b1
+ .octa 0x9bdc06a725c712359bdc06a725c71235,\
+ 0x9bdc06a725c712359bdc06a725c71235
+ .octa 0xc19bf174cf692694c19bf174cf692694,\
+ 0xc19bf174cf692694c19bf174cf692694
+ .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
+ 0xe49b69c19ef14ad2e49b69c19ef14ad2
+ .octa 0xefbe4786384f25e3efbe4786384f25e3,\
+ 0xefbe4786384f25e3efbe4786384f25e3
+ .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
+ 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
+ .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
+ 0x240ca1cc77ac9c65240ca1cc77ac9c65
+ .octa 0x2de92c6f592b02752de92c6f592b0275,\
+ 0x2de92c6f592b02752de92c6f592b0275
+ .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
+ 0x4a7484aa6ea6e4834a7484aa6ea6e483
+ .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
+ 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
+ .octa 0x76f988da831153b576f988da831153b5,\
+ 0x76f988da831153b576f988da831153b5
+ .octa 0x983e5152ee66dfab983e5152ee66dfab,\
+ 0x983e5152ee66dfab983e5152ee66dfab
+ .octa 0xa831c66d2db43210a831c66d2db43210,\
+ 0xa831c66d2db43210a831c66d2db43210
+ .octa 0xb00327c898fb213fb00327c898fb213f,\
+ 0xb00327c898fb213fb00327c898fb213f
+ .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
+ 0xbf597fc7beef0ee4bf597fc7beef0ee4
+ .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
+ 0xc6e00bf33da88fc2c6e00bf33da88fc2
+ .octa 0xd5a79147930aa725d5a79147930aa725,\
+ 0xd5a79147930aa725d5a79147930aa725
+ .octa 0x06ca6351e003826f06ca6351e003826f,\
+ 0x06ca6351e003826f06ca6351e003826f
+ .octa 0x142929670a0e6e70142929670a0e6e70,\
+ 0x142929670a0e6e70142929670a0e6e70
+ .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
+ 0x27b70a8546d22ffc27b70a8546d22ffc
+ .octa 0x2e1b21385c26c9262e1b21385c26c926,\
+ 0x2e1b21385c26c9262e1b21385c26c926
+ .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
+ 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
+ .octa 0x53380d139d95b3df53380d139d95b3df,\
+ 0x53380d139d95b3df53380d139d95b3df
+ .octa 0x650a73548baf63de650a73548baf63de,\
+ 0x650a73548baf63de650a73548baf63de
+ .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
+ 0x766a0abb3c77b2a8766a0abb3c77b2a8
+ .octa 0x81c2c92e47edaee681c2c92e47edaee6,\
+ 0x81c2c92e47edaee681c2c92e47edaee6
+ .octa 0x92722c851482353b92722c851482353b,\
+ 0x92722c851482353b92722c851482353b
+ .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
+ 0xa2bfe8a14cf10364a2bfe8a14cf10364
+ .octa 0xa81a664bbc423001a81a664bbc423001,\
+ 0xa81a664bbc423001a81a664bbc423001
+ .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
+ 0xc24b8b70d0f89791c24b8b70d0f89791
+ .octa 0xc76c51a30654be30c76c51a30654be30,\
+ 0xc76c51a30654be30c76c51a30654be30
+ .octa 0xd192e819d6ef5218d192e819d6ef5218,\
+ 0xd192e819d6ef5218d192e819d6ef5218
+ .octa 0xd69906245565a910d69906245565a910,\
+ 0xd69906245565a910d69906245565a910
+ .octa 0xf40e35855771202af40e35855771202a,\
+ 0xf40e35855771202af40e35855771202a
+ .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
+ 0x106aa07032bbd1b8106aa07032bbd1b8
+ .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
+ 0x19a4c116b8d2d0c819a4c116b8d2d0c8
+ .octa 0x1e376c085141ab531e376c085141ab53,\
+ 0x1e376c085141ab531e376c085141ab53
+ .octa 0x2748774cdf8eeb992748774cdf8eeb99,\
+ 0x2748774cdf8eeb992748774cdf8eeb99
+ .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
+ 0x34b0bcb5e19b48a834b0bcb5e19b48a8
+ .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
+ 0x391c0cb3c5c95a63391c0cb3c5c95a63
+ .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
+ 0x4ed8aa4ae3418acb4ed8aa4ae3418acb
+ .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
+ 0x5b9cca4f7763e3735b9cca4f7763e373
+ .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
+ 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
+ .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
+ 0x748f82ee5defb2fc748f82ee5defb2fc
+ .octa 0x78a5636f43172f6078a5636f43172f60,\
+ 0x78a5636f43172f6078a5636f43172f60
+ .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
+ 0x84c87814a1f0ab7284c87814a1f0ab72
+ .octa 0x8cc702081a6439ec8cc702081a6439ec,\
+ 0x8cc702081a6439ec8cc702081a6439ec
+ .octa 0x90befffa23631e2890befffa23631e28,\
+ 0x90befffa23631e2890befffa23631e28
+ .octa 0xa4506cebde82bde9a4506cebde82bde9,\
+ 0xa4506cebde82bde9a4506cebde82bde9
+ .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
+ 0xbef9a3f7b2c67915bef9a3f7b2c67915
+ .octa 0xc67178f2e372532bc67178f2e372532b,\
+ 0xc67178f2e372532bc67178f2e372532b
+ .octa 0xca273eceea26619cca273eceea26619c,\
+ 0xca273eceea26619cca273eceea26619c
+ .octa 0xd186b8c721c0c207d186b8c721c0c207,\
+ 0xd186b8c721c0c207d186b8c721c0c207
+ .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
+ 0xeada7dd6cde0eb1eeada7dd6cde0eb1e
+ .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
+ 0xf57d4f7fee6ed178f57d4f7fee6ed178
+ .octa 0x06f067aa72176fba06f067aa72176fba,\
+ 0x06f067aa72176fba06f067aa72176fba
+ .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
+ 0x0a637dc5a2c898a60a637dc5a2c898a6
+ .octa 0x113f9804bef90dae113f9804bef90dae,\
+ 0x113f9804bef90dae113f9804bef90dae
+ .octa 0x1b710b35131c471b1b710b35131c471b,\
+ 0x1b710b35131c471b1b710b35131c471b
+ .octa 0x28db77f523047d8428db77f523047d84,\
+ 0x28db77f523047d8428db77f523047d84
+ .octa 0x32caab7b40c7249332caab7b40c72493,\
+ 0x32caab7b40c7249332caab7b40c72493
+ .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
+ 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
+ .octa 0x431d67c49c100d4c431d67c49c100d4c,\
+ 0x431d67c49c100d4c431d67c49c100d4c
+ .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
+ 0x4cc5d4becb3e42b64cc5d4becb3e42b6
+ .octa 0x597f299cfc657e2a597f299cfc657e2a,\
+ 0x597f299cfc657e2a597f299cfc657e2a
+ .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
+ 0x5fcb6fab3ad6faec5fcb6fab3ad6faec
+ .octa 0x6c44198c4a4758176c44198c4a475817,\
+ 0x6c44198c4a4758176c44198c4a475817
+
+PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
+ .octa 0x18191a1b1c1d1e1f1011121314151617
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 0b17c83d027d..2b0e2a6825f3 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -346,4 +346,10 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha512-ssse3");
+MODULE_ALIAS_CRYPTO("sha512-avx");
+MODULE_ALIAS_CRYPTO("sha512-avx2");
MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha384-ssse3");
+MODULE_ALIAS_CRYPTO("sha384-avx");
+MODULE_ALIAS_CRYPTO("sha384-avx2");
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 9e1e27d31c6d..a1e71d431fed 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -64,22 +64,16 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
}
/*
- * We can return 0 to resume the syscall or anything else to go to phase
- * 2. If we resume the syscall, we need to put something appropriate in
- * regs->orig_ax.
- *
- * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
- * are fully functional.
- *
- * For phase 2's benefit, our return value is:
- * 0: resume the syscall
- * 1: go to phase 2; no seccomp phase 2 needed
- * anything else: go to phase 2; pass return value to seccomp
+ * Returns the syscall nr to run (which should match regs->orig_ax) or -1
+ * to skip the syscall.
*/
-unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
+static long syscall_trace_enter(struct pt_regs *regs)
{
+ u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned long ret = 0;
+ bool emulated = false;
u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
@@ -87,11 +81,19 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
+ if (unlikely(work & _TIF_SYSCALL_EMU))
+ emulated = true;
+
+ if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
+ tracehook_report_syscall_entry(regs))
+ return -1L;
+
+ if (emulated)
+ return -1L;
+
#ifdef CONFIG_SECCOMP
/*
- * Do seccomp first -- it should minimize exposure of other
- * code, and keeping seccomp fast is probably more valuable
- * than the rest of this.
+ * Do seccomp after ptrace, to catch any tracer changes.
*/
if (work & _TIF_SECCOMP) {
struct seccomp_data sd;
@@ -118,69 +120,12 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
sd.args[5] = regs->bp;
}
- BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0);
- BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1);
-
- ret = seccomp_phase1(&sd);
- if (ret == SECCOMP_PHASE1_SKIP) {
- regs->orig_ax = -1;
- ret = 0;
- } else if (ret != SECCOMP_PHASE1_OK) {
- return ret; /* Go directly to phase 2 */
- }
-
- work &= ~_TIF_SECCOMP;
- }
-#endif
-
- /* Do our best to finish without phase 2. */
- if (work == 0)
- return ret; /* seccomp and/or nohz only (ret == 0 here) */
-
-#ifdef CONFIG_AUDITSYSCALL
- if (work == _TIF_SYSCALL_AUDIT) {
- /*
- * If there is no more work to be done except auditing,
- * then audit in phase 1. Phase 2 always audits, so, if
- * we audit here, then we can't go on to phase 2.
- */
- do_audit_syscall_entry(regs, arch);
- return 0;
+ ret = __secure_computing(&sd);
+ if (ret == -1)
+ return ret;
}
#endif
- return 1; /* Something is enabled that we can't handle in phase 1 */
-}
-
-/* Returns the syscall nr to run (which should match regs->orig_ax). */
-long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
- unsigned long phase1_result)
-{
- struct thread_info *ti = pt_regs_to_thread_info(regs);
- long ret = 0;
- u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
-
- if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
- BUG_ON(regs != task_pt_regs(current));
-
-#ifdef CONFIG_SECCOMP
- /*
- * Call seccomp_phase2 before running the other hooks so that
- * they can see any changes made by a seccomp tracer.
- */
- if (phase1_result > 1 && seccomp_phase2(phase1_result)) {
- /* seccomp failures shouldn't expose any additional code. */
- return -1;
- }
-#endif
-
- if (unlikely(work & _TIF_SYSCALL_EMU))
- ret = -1L;
-
- if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
- tracehook_report_syscall_entry(regs))
- ret = -1L;
-
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->orig_ax);
@@ -189,17 +134,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
return ret ?: regs->orig_ax;
}
-long syscall_trace_enter(struct pt_regs *regs)
-{
- u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
- unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
-
- if (phase1_result == 0)
- return regs->orig_ax;
- else
- return syscall_trace_enter_phase2(regs, arch, phase1_result);
-}
-
#define EXIT_TO_USERMODE_LOOP_FLAGS \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 3329844e3c43..f840766659a8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -331,15 +331,9 @@ static void vgetcpu_cpu_init(void *arg)
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}
-static int
-vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
+static int vgetcpu_online(unsigned int cpu)
{
- long cpu = (long)arg;
-
- if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
- smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
-
- return NOTIFY_DONE;
+ return smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
}
static int __init init_vdso(void)
@@ -350,15 +344,9 @@ static int __init init_vdso(void)
init_vdso_image(&vdso_image_x32);
#endif
- cpu_notifier_register_begin();
-
- on_each_cpu(vgetcpu_cpu_init, NULL, 1);
/* notifier priority > KVM */
- __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
-
- cpu_notifier_register_done();
-
- return 0;
+ return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE,
+ "AP_X86_VDSO_VMA_ONLINE", vgetcpu_online, NULL);
}
subsys_initcall(init_vdso);
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 75fc719b7f31..636c4b341f36 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -207,7 +207,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
*/
regs->orig_ax = syscall_nr;
regs->ax = -ENOSYS;
- tmp = secure_computing();
+ tmp = secure_computing(NULL);
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index bd3e8421b57c..e07a22bb9308 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -370,13 +370,13 @@ static int amd_pmu_cpu_prepare(int cpu)
WARN_ON_ONCE(cpuc->amd_nb);
if (!x86_pmu.amd_nb_constraints)
- return NOTIFY_OK;
+ return 0;
cpuc->amd_nb = amd_alloc_nb(cpu);
if (!cpuc->amd_nb)
- return NOTIFY_BAD;
+ return -ENOMEM;
- return NOTIFY_OK;
+ return 0;
}
static void amd_pmu_cpu_starting(int cpu)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index feb90f6730e8..155ea5324ae0 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -655,8 +655,12 @@ fail:
}
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- raw.size = sizeof(u32) + ibs_data.size;
- raw.data = ibs_data.data;
+ raw = (struct perf_raw_record){
+ .frag = {
+ .size = sizeof(u32) + ibs_data.size,
+ .data = ibs_data.data,
+ },
+ };
data.raw = &raw;
}
@@ -721,13 +725,10 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
return ret;
}
-static __init int perf_event_ibs_init(void)
+static __init void perf_event_ibs_init(void)
{
struct attribute **attr = ibs_op_format_attrs;
- if (!ibs_caps)
- return -ENODEV; /* ibs not supported by the cpu */
-
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
@@ -738,13 +739,11 @@ static __init int perf_event_ibs_init(void)
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
-
- return 0;
}
#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
-static __init int perf_event_ibs_init(void) { return 0; }
+static __init void perf_event_ibs_init(void) { }
#endif
@@ -921,7 +920,7 @@ static inline int get_ibs_lvt_offset(void)
return val & IBSCTL_LVT_OFFSET_MASK;
}
-static void setup_APIC_ibs(void *dummy)
+static void setup_APIC_ibs(void)
{
int offset;
@@ -936,7 +935,7 @@ failed:
smp_processor_id());
}
-static void clear_APIC_ibs(void *dummy)
+static void clear_APIC_ibs(void)
{
int offset;
@@ -945,18 +944,24 @@ static void clear_APIC_ibs(void *dummy)
setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
}
+static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
+{
+ setup_APIC_ibs();
+ return 0;
+}
+
#ifdef CONFIG_PM
static int perf_ibs_suspend(void)
{
- clear_APIC_ibs(NULL);
+ clear_APIC_ibs();
return 0;
}
static void perf_ibs_resume(void)
{
ibs_eilvt_setup();
- setup_APIC_ibs(NULL);
+ setup_APIC_ibs();
}
static struct syscore_ops perf_ibs_syscore_ops = {
@@ -975,27 +980,15 @@ static inline void perf_ibs_pm_init(void) { }
#endif
-static int
-perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
{
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_STARTING:
- setup_APIC_ibs(NULL);
- break;
- case CPU_DYING:
- clear_APIC_ibs(NULL);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
+ clear_APIC_ibs();
+ return 0;
}
static __init int amd_ibs_init(void)
{
u32 caps;
- int ret = -EINVAL;
caps = __get_ibs_caps();
if (!caps)
@@ -1004,22 +997,25 @@ static __init int amd_ibs_init(void)
ibs_eilvt_setup();
if (!ibs_eilvt_valid())
- goto out;
+ return -EINVAL;
perf_ibs_pm_init();
- cpu_notifier_register_begin();
+
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
smp_mb();
- smp_call_function(setup_APIC_ibs, NULL, 1);
- __perf_cpu_notifier(perf_ibs_cpu_notifier);
- cpu_notifier_register_done();
+ /*
+ * x86_pmu_amd_ibs_starting_cpu will be called from core on
+ * all online cpus.
+ */
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
+ "AP_PERF_X86_AMD_IBS_STARTING",
+ x86_pmu_amd_ibs_starting_cpu,
+ x86_pmu_amd_ibs_dying_cpu);
- ret = perf_event_ibs_init();
-out:
- if (ret)
- pr_err("Failed to setup IBS, %d\n", ret);
- return ret;
+ perf_event_ibs_init();
+
+ return 0;
}
/* Since we need the pci subsystem to init ibs we can't do this earlier: */
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index 55a3529dbf12..9842270ed2f2 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -228,12 +228,12 @@ static struct pmu pmu_class = {
.read = pmu_event_read,
};
-static void power_cpu_exit(int cpu)
+static int power_cpu_exit(unsigned int cpu)
{
int target;
if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask))
- return;
+ return 0;
/*
* Find a new CPU on the same compute unit, if was set in cpumask
@@ -245,9 +245,10 @@ static void power_cpu_exit(int cpu)
cpumask_set_cpu(target, &cpu_mask);
perf_pmu_migrate_context(&pmu_class, cpu, target);
}
+ return 0;
}
-static void power_cpu_init(int cpu)
+static int power_cpu_init(unsigned int cpu)
{
int target;
@@ -255,7 +256,7 @@ static void power_cpu_init(int cpu)
* 1) If any CPU is set at cpu_mask in the same compute unit, do
* nothing.
* 2) If no CPU is set at cpu_mask in the same compute unit,
- * set current STARTING CPU.
+ * set current ONLINE CPU.
*
* Note: if there is a CPU aside of the new one already in the
* sibling mask, then it is also in cpu_mask.
@@ -263,33 +264,9 @@ static void power_cpu_init(int cpu)
target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
if (target >= nr_cpumask_bits)
cpumask_set_cpu(cpu, &cpu_mask);
+ return 0;
}
-static int
-power_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_FAILED:
- case CPU_STARTING:
- power_cpu_init(cpu);
- break;
- case CPU_DOWN_PREPARE:
- power_cpu_exit(cpu);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block power_cpu_notifier_nb = {
- .notifier_call = power_cpu_notifier,
- .priority = CPU_PRI_PERF,
-};
-
static const struct x86_cpu_id cpu_match[] = {
{ .vendor = X86_VENDOR_AMD, .family = 0x15 },
{},
@@ -297,7 +274,7 @@ static const struct x86_cpu_id cpu_match[] = {
static int __init amd_power_pmu_init(void)
{
- int cpu, target, ret;
+ int ret;
if (!x86_match_cpu(cpu_match))
return 0;
@@ -312,38 +289,25 @@ static int __init amd_power_pmu_init(void)
return -ENODEV;
}
- cpu_notifier_register_begin();
- /* Choose one online core of each compute unit. */
- for_each_online_cpu(cpu) {
- target = cpumask_first(topology_sibling_cpumask(cpu));
- if (!cpumask_test_cpu(target, &cpu_mask))
- cpumask_set_cpu(target, &cpu_mask);
- }
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
+ "AP_PERF_X86_AMD_POWER_ONLINE",
+ power_cpu_init, power_cpu_exit);
ret = perf_pmu_register(&pmu_class, "power", -1);
if (WARN_ON(ret)) {
pr_warn("AMD Power PMU registration failed\n");
- goto out;
+ return ret;
}
- __register_cpu_notifier(&power_cpu_notifier_nb);
-
pr_info("AMD Power PMU detected\n");
-
-out:
- cpu_notifier_register_done();
-
return ret;
}
module_init(amd_power_pmu_init);
static void __exit amd_power_pmu_exit(void)
{
- cpu_notifier_register_begin();
- __unregister_cpu_notifier(&power_cpu_notifier_nb);
- cpu_notifier_register_done();
-
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE);
perf_pmu_unregister(&pmu_class);
}
module_exit(amd_power_pmu_exit);
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 98ac57381bf9..e6131d4454e6 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -358,7 +358,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this,
return this;
}
-static void amd_uncore_cpu_starting(unsigned int cpu)
+static int amd_uncore_cpu_starting(unsigned int cpu)
{
unsigned int eax, ebx, ecx, edx;
struct amd_uncore *uncore;
@@ -384,6 +384,8 @@ static void amd_uncore_cpu_starting(unsigned int cpu)
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
*per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
}
+
+ return 0;
}
static void uncore_online(unsigned int cpu,
@@ -398,13 +400,15 @@ static void uncore_online(unsigned int cpu,
cpumask_set_cpu(cpu, uncore->active_mask);
}
-static void amd_uncore_cpu_online(unsigned int cpu)
+static int amd_uncore_cpu_online(unsigned int cpu)
{
if (amd_uncore_nb)
uncore_online(cpu, amd_uncore_nb);
if (amd_uncore_l2)
uncore_online(cpu, amd_uncore_l2);
+
+ return 0;
}
static void uncore_down_prepare(unsigned int cpu,
@@ -433,13 +437,15 @@ static void uncore_down_prepare(unsigned int cpu,
}
}
-static void amd_uncore_cpu_down_prepare(unsigned int cpu)
+static int amd_uncore_cpu_down_prepare(unsigned int cpu)
{
if (amd_uncore_nb)
uncore_down_prepare(cpu, amd_uncore_nb);
if (amd_uncore_l2)
uncore_down_prepare(cpu, amd_uncore_l2);
+
+ return 0;
}
static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
@@ -454,74 +460,19 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
*per_cpu_ptr(uncores, cpu) = NULL;
}
-static void amd_uncore_cpu_dead(unsigned int cpu)
+static int amd_uncore_cpu_dead(unsigned int cpu)
{
if (amd_uncore_nb)
uncore_dead(cpu, amd_uncore_nb);
if (amd_uncore_l2)
uncore_dead(cpu, amd_uncore_l2);
-}
-
-static int
-amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
- void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- if (amd_uncore_cpu_up_prepare(cpu))
- return notifier_from_errno(-ENOMEM);
- break;
-
- case CPU_STARTING:
- amd_uncore_cpu_starting(cpu);
- break;
-
- case CPU_ONLINE:
- amd_uncore_cpu_online(cpu);
- break;
-
- case CPU_DOWN_PREPARE:
- amd_uncore_cpu_down_prepare(cpu);
- break;
-
- case CPU_UP_CANCELED:
- case CPU_DEAD:
- amd_uncore_cpu_dead(cpu);
- break;
-
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block amd_uncore_cpu_notifier_block = {
- .notifier_call = amd_uncore_cpu_notifier,
- .priority = CPU_PRI_PERF + 1,
-};
-
-static void __init init_cpu_already_online(void *dummy)
-{
- unsigned int cpu = smp_processor_id();
-
- amd_uncore_cpu_starting(cpu);
- amd_uncore_cpu_online(cpu);
-}
-static void cleanup_cpu_online(void *dummy)
-{
- unsigned int cpu = smp_processor_id();
-
- amd_uncore_cpu_dead(cpu);
+ return 0;
}
static int __init amd_uncore_init(void)
{
- unsigned int cpu, cpu2;
int ret = -ENODEV;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
@@ -558,38 +509,29 @@ static int __init amd_uncore_init(void)
ret = 0;
}
- if (ret)
- goto fail_nodev;
-
- cpu_notifier_register_begin();
-
- /* init cpus already online before registering for hotplug notifier */
- for_each_online_cpu(cpu) {
- ret = amd_uncore_cpu_up_prepare(cpu);
- if (ret)
- goto fail_online;
- smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
- }
-
- __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
- cpu_notifier_register_done();
-
+ /*
+ * Install callbacks. Core will call them for each online cpu.
+ */
+ if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
+ "PERF_X86_AMD_UNCORE_PREP",
+ amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
+ goto fail_l2;
+
+ if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
+ "AP_PERF_X86_AMD_UNCORE_STARTING",
+ amd_uncore_cpu_starting, NULL))
+ goto fail_prep;
+ if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
+ "AP_PERF_X86_AMD_UNCORE_ONLINE",
+ amd_uncore_cpu_online,
+ amd_uncore_cpu_down_prepare))
+ goto fail_start;
return 0;
-
-fail_online:
- for_each_online_cpu(cpu2) {
- if (cpu2 == cpu)
- break;
- smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
- }
- cpu_notifier_register_done();
-
- /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
- amd_uncore_nb = amd_uncore_l2 = NULL;
-
- if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
- perf_pmu_unregister(&amd_l2_pmu);
+fail_start:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
+fail_prep:
+ cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
fail_l2:
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
perf_pmu_unregister(&amd_nb_pmu);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index dfebbde2a4cc..c17f0de5fd39 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1477,49 +1477,49 @@ NOKPROBE_SYMBOL(perf_event_nmi_handler);
struct event_constraint emptyconstraint;
struct event_constraint unconstrained;
-static int
-x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+static int x86_pmu_prepare_cpu(unsigned int cpu)
{
- unsigned int cpu = (long)hcpu;
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- int i, ret = NOTIFY_OK;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
- cpuc->kfree_on_online[i] = NULL;
- if (x86_pmu.cpu_prepare)
- ret = x86_pmu.cpu_prepare(cpu);
- break;
-
- case CPU_STARTING:
- if (x86_pmu.cpu_starting)
- x86_pmu.cpu_starting(cpu);
- break;
+ int i;
- case CPU_ONLINE:
- for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
- kfree(cpuc->kfree_on_online[i]);
- cpuc->kfree_on_online[i] = NULL;
- }
- break;
+ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+ cpuc->kfree_on_online[i] = NULL;
+ if (x86_pmu.cpu_prepare)
+ return x86_pmu.cpu_prepare(cpu);
+ return 0;
+}
- case CPU_DYING:
- if (x86_pmu.cpu_dying)
- x86_pmu.cpu_dying(cpu);
- break;
+static int x86_pmu_dead_cpu(unsigned int cpu)
+{
+ if (x86_pmu.cpu_dead)
+ x86_pmu.cpu_dead(cpu);
+ return 0;
+}
- case CPU_UP_CANCELED:
- case CPU_DEAD:
- if (x86_pmu.cpu_dead)
- x86_pmu.cpu_dead(cpu);
- break;
+static int x86_pmu_online_cpu(unsigned int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ int i;
- default:
- break;
+ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+ kfree(cpuc->kfree_on_online[i]);
+ cpuc->kfree_on_online[i] = NULL;
}
+ return 0;
+}
- return ret;
+static int x86_pmu_starting_cpu(unsigned int cpu)
+{
+ if (x86_pmu.cpu_starting)
+ x86_pmu.cpu_starting(cpu);
+ return 0;
+}
+
+static int x86_pmu_dying_cpu(unsigned int cpu)
+{
+ if (x86_pmu.cpu_dying)
+ x86_pmu.cpu_dying(cpu);
+ return 0;
}
static void __init pmu_check_apic(void)
@@ -1787,10 +1787,39 @@ static int __init init_hw_perf_events(void)
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
- perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
- perf_cpu_notifier(x86_pmu_notifier);
+ /*
+ * Install callbacks. Core will call them for each online
+ * cpu.
+ */
+ err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE",
+ x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
+ if (err)
+ return err;
+
+ err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
+ "AP_PERF_X86_STARTING", x86_pmu_starting_cpu,
+ x86_pmu_dying_cpu);
+ if (err)
+ goto out;
+
+ err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE",
+ x86_pmu_online_cpu, NULL);
+ if (err)
+ goto out1;
+
+ err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+ if (err)
+ goto out2;
return 0;
+
+out2:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE);
+out1:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
+out:
+ cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
+ return err;
}
early_initcall(init_hw_perf_events);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0974ba11e954..2cbde2f449aa 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3109,7 +3109,7 @@ static int intel_pmu_cpu_prepare(int cpu)
cpuc->excl_thread_id = 0;
}
- return NOTIFY_OK;
+ return 0;
err_constraint_list:
kfree(cpuc->constraint_list);
@@ -3120,7 +3120,7 @@ err_shared_regs:
cpuc->shared_regs = NULL;
err:
- return NOTIFY_BAD;
+ return -ENOMEM;
}
static void intel_pmu_cpu_starting(int cpu)
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 7b5fd811ef45..783c49ddef29 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -1577,7 +1577,7 @@ static inline void cqm_pick_event_reader(int cpu)
cpumask_set_cpu(cpu, &cqm_cpumask);
}
-static void intel_cqm_cpu_starting(unsigned int cpu)
+static int intel_cqm_cpu_starting(unsigned int cpu)
{
struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -1588,39 +1588,26 @@ static void intel_cqm_cpu_starting(unsigned int cpu)
WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
+
+ cqm_pick_event_reader(cpu);
+ return 0;
}
-static void intel_cqm_cpu_exit(unsigned int cpu)
+static int intel_cqm_cpu_exit(unsigned int cpu)
{
int target;
/* Is @cpu the current cqm reader for this package ? */
if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
- return;
+ return 0;
/* Find another online reader in this package */
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
if (target < nr_cpu_ids)
cpumask_set_cpu(target, &cqm_cpumask);
-}
-
-static int intel_cqm_cpu_notifier(struct notifier_block *nb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_PREPARE:
- intel_cqm_cpu_exit(cpu);
- break;
- case CPU_STARTING:
- intel_cqm_cpu_starting(cpu);
- cqm_pick_event_reader(cpu);
- break;
- }
- return NOTIFY_OK;
+ return 0;
}
static const struct x86_cpu_id intel_cqm_match[] = {
@@ -1682,7 +1669,7 @@ out:
static int __init intel_cqm_init(void)
{
char *str = NULL, scale[20];
- int i, cpu, ret;
+ int cpu, ret;
if (x86_match_cpu(intel_cqm_match))
cqm_enabled = true;
@@ -1705,8 +1692,7 @@ static int __init intel_cqm_init(void)
*
* Also, check that the scales match on all cpus.
*/
- cpu_notifier_register_begin();
-
+ get_online_cpus();
for_each_online_cpu(cpu) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -1743,11 +1729,6 @@ static int __init intel_cqm_init(void)
if (ret)
goto out;
- for_each_online_cpu(i) {
- intel_cqm_cpu_starting(i);
- cqm_pick_event_reader(i);
- }
-
if (mbm_enabled)
ret = intel_mbm_init();
if (ret && !cqm_enabled)
@@ -1772,12 +1753,18 @@ static int __init intel_cqm_init(void)
pr_info("Intel MBM enabled\n");
/*
- * Register the hot cpu notifier once we are sure cqm
+ * Setup the hot cpu notifier once we are sure cqm
* is enabled to avoid notifier leak.
*/
- __perf_cpu_notifier(intel_cqm_cpu_notifier);
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING,
+ "AP_PERF_X86_CQM_STARTING",
+ intel_cqm_cpu_starting, NULL);
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "AP_PERF_X86_CQM_ONLINE",
+ NULL, intel_cqm_cpu_exit);
+
out:
- cpu_notifier_register_done();
+ put_online_cpus();
+
if (ret) {
kfree(str);
cqm_cleanup();
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 4c7638b91fa5..3ca87b5a8677 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -366,7 +366,7 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
* Check if exiting cpu is the designated reader. If so migrate the
* events when there is a valid target available
*/
-static void cstate_cpu_exit(int cpu)
+static int cstate_cpu_exit(unsigned int cpu)
{
unsigned int target;
@@ -391,9 +391,10 @@ static void cstate_cpu_exit(int cpu)
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
}
}
+ return 0;
}
-static void cstate_cpu_init(int cpu)
+static int cstate_cpu_init(unsigned int cpu)
{
unsigned int target;
@@ -415,31 +416,10 @@ static void cstate_cpu_init(int cpu)
topology_core_cpumask(cpu));
if (has_cstate_pkg && target >= nr_cpu_ids)
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
-}
-static int cstate_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_STARTING:
- cstate_cpu_init(cpu);
- break;
- case CPU_DOWN_PREPARE:
- cstate_cpu_exit(cpu);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block cstate_cpu_nb = {
- .notifier_call = cstate_cpu_notifier,
- .priority = CPU_PRI_PERF + 1,
-};
-
static struct pmu cstate_core_pmu = {
.attr_groups = core_attr_groups,
.name = "cstate_core",
@@ -600,18 +580,20 @@ static inline void cstate_cleanup(void)
static int __init cstate_init(void)
{
- int cpu, err;
+ int err;
- cpu_notifier_register_begin();
- for_each_online_cpu(cpu)
- cstate_cpu_init(cpu);
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
+ "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init,
+ NULL);
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
+ "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit);
if (has_cstate_core) {
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
if (err) {
has_cstate_core = false;
pr_info("Failed to register cstate core pmu\n");
- goto out;
+ return err;
}
}
@@ -621,12 +603,10 @@ static int __init cstate_init(void)
has_cstate_pkg = false;
pr_info("Failed to register cstate pkg pmu\n");
cstate_cleanup();
- goto out;
+ return err;
}
}
- __register_cpu_notifier(&cstate_cpu_nb);
-out:
- cpu_notifier_register_done();
+
return err;
}
@@ -652,9 +632,8 @@ module_init(cstate_pmu_init);
static void __exit cstate_pmu_exit(void)
{
- cpu_notifier_register_begin();
- __unregister_cpu_notifier(&cstate_cpu_nb);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
cstate_cleanup();
- cpu_notifier_register_done();
}
module_exit(cstate_pmu_exit);
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index d0c58b35155f..28865938aadf 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -556,14 +556,14 @@ const struct attribute_group *rapl_attr_groups[] = {
NULL,
};
-static void rapl_cpu_exit(int cpu)
+static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
/* Check if exiting cpu is used for collecting rapl events */
if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
- return;
+ return 0;
pmu->cpu = -1;
/* Find a new cpu to collect rapl events */
@@ -575,9 +575,10 @@ static void rapl_cpu_exit(int cpu)
pmu->cpu = target;
perf_pmu_migrate_context(pmu->pmu, cpu, target);
}
+ return 0;
}
-static void rapl_cpu_init(int cpu)
+static int rapl_cpu_online(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
@@ -588,13 +589,14 @@ static void rapl_cpu_init(int cpu)
*/
target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
if (target < nr_cpu_ids)
- return;
+ return 0;
cpumask_set_cpu(cpu, &rapl_cpu_mask);
pmu->cpu = cpu;
+ return 0;
}
-static int rapl_cpu_prepare(int cpu)
+static int rapl_cpu_prepare(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
@@ -615,33 +617,6 @@ static int rapl_cpu_prepare(int cpu)
return 0;
}
-static int rapl_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- rapl_cpu_prepare(cpu);
- break;
-
- case CPU_DOWN_FAILED:
- case CPU_ONLINE:
- rapl_cpu_init(cpu);
- break;
-
- case CPU_DOWN_PREPARE:
- rapl_cpu_exit(cpu);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block rapl_cpu_nb = {
- .notifier_call = rapl_cpu_notifier,
- .priority = CPU_PRI_PERF + 1,
-};
-
static int rapl_check_hw_unit(bool apply_quirk)
{
u64 msr_rapl_power_unit_bits;
@@ -692,24 +667,6 @@ static void __init rapl_advertise(void)
}
}
-static int __init rapl_prepare_cpus(void)
-{
- unsigned int cpu, pkg;
- int ret;
-
- for_each_online_cpu(cpu) {
- pkg = topology_logical_package_id(cpu);
- if (rapl_pmus->pmus[pkg])
- continue;
-
- ret = rapl_cpu_prepare(cpu);
- if (ret)
- return ret;
- rapl_cpu_init(cpu);
- }
- return 0;
-}
-
static void cleanup_rapl_pmus(void)
{
int i;
@@ -837,35 +794,44 @@ static int __init rapl_pmu_init(void)
if (ret)
return ret;
- cpu_notifier_register_begin();
+ /*
+ * Install callbacks. Core will call them for each online cpu.
+ */
- ret = rapl_prepare_cpus();
+ ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP",
+ rapl_cpu_prepare, NULL);
if (ret)
goto out;
+ ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
+ "AP_PERF_X86_RAPL_ONLINE",
+ rapl_cpu_online, rapl_cpu_offline);
+ if (ret)
+ goto out1;
+
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
if (ret)
- goto out;
+ goto out2;
- __register_cpu_notifier(&rapl_cpu_nb);
- cpu_notifier_register_done();
rapl_advertise();
return 0;
+out2:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+out1:
+ cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP);
out:
pr_warn("Initialization failed (%d), disabled\n", ret);
cleanup_rapl_pmus();
- cpu_notifier_register_done();
return ret;
}
module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void)
{
- cpu_notifier_register_begin();
- __unregister_cpu_notifier(&rapl_cpu_nb);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP);
perf_pmu_unregister(&rapl_pmus->pmu);
cleanup_rapl_pmus();
- cpu_notifier_register_done();
}
module_exit(intel_rapl_exit);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 59b4974c697f..3f3d0d67749b 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1052,7 +1052,7 @@ static void uncore_pci_exit(void)
}
}
-static void uncore_cpu_dying(int cpu)
+static int uncore_cpu_dying(unsigned int cpu)
{
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
@@ -1069,16 +1069,19 @@ static void uncore_cpu_dying(int cpu)
uncore_box_exit(box);
}
}
+ return 0;
}
-static void uncore_cpu_starting(int cpu, bool init)
+static int first_init;
+
+static int uncore_cpu_starting(unsigned int cpu)
{
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
int i, pkg, ncpus = 1;
- if (init) {
+ if (first_init) {
/*
* On init we get the number of online cpus in the package
* and set refcount for all of them.
@@ -1099,9 +1102,11 @@ static void uncore_cpu_starting(int cpu, bool init)
uncore_box_init(box);
}
}
+
+ return 0;
}
-static int uncore_cpu_prepare(int cpu)
+static int uncore_cpu_prepare(unsigned int cpu)
{
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
@@ -1164,13 +1169,13 @@ static void uncore_change_context(struct intel_uncore_type **uncores,
uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
}
-static void uncore_event_exit_cpu(int cpu)
+static int uncore_event_cpu_offline(unsigned int cpu)
{
int target;
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
- return;
+ return 0;
/* Find a new cpu to collect uncore events */
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
@@ -1183,9 +1188,10 @@ static void uncore_event_exit_cpu(int cpu)
uncore_change_context(uncore_msr_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
+ return 0;
}
-static void uncore_event_init_cpu(int cpu)
+static int uncore_event_cpu_online(unsigned int cpu)
{
int target;
@@ -1195,50 +1201,15 @@ static void uncore_event_init_cpu(int cpu)
*/
target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
if (target < nr_cpu_ids)
- return;
+ return 0;
cpumask_set_cpu(cpu, &uncore_cpu_mask);
uncore_change_context(uncore_msr_uncores, -1, cpu);
uncore_change_context(uncore_pci_uncores, -1, cpu);
+ return 0;
}
-static int uncore_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- return notifier_from_errno(uncore_cpu_prepare(cpu));
-
- case CPU_STARTING:
- uncore_cpu_starting(cpu, false);
- case CPU_DOWN_FAILED:
- uncore_event_init_cpu(cpu);
- break;
-
- case CPU_UP_CANCELED:
- case CPU_DYING:
- uncore_cpu_dying(cpu);
- break;
-
- case CPU_DOWN_PREPARE:
- uncore_event_exit_cpu(cpu);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block uncore_cpu_nb = {
- .notifier_call = uncore_cpu_notifier,
- /*
- * to migrate uncore events, our notifier should be executed
- * before perf core's notifier.
- */
- .priority = CPU_PRI_PERF + 1,
-};
-
static int __init type_pmu_register(struct intel_uncore_type *type)
{
int i, ret;
@@ -1282,41 +1253,6 @@ err:
return ret;
}
-static void __init uncore_cpu_setup(void *dummy)
-{
- uncore_cpu_starting(smp_processor_id(), true);
-}
-
-/* Lazy to avoid allocation of a few bytes for the normal case */
-static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
-
-static int __init uncore_cpumask_init(bool msr)
-{
- unsigned int cpu;
-
- for_each_online_cpu(cpu) {
- unsigned int pkg = topology_logical_package_id(cpu);
- int ret;
-
- if (test_and_set_bit(pkg, packages))
- continue;
- /*
- * The first online cpu of each package allocates and takes
- * the refcounts for all other online cpus in that package.
- * If msrs are not enabled no allocation is required.
- */
- if (msr) {
- ret = uncore_cpu_prepare(cpu);
- if (ret)
- return ret;
- }
- uncore_event_init_cpu(cpu);
- smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
- }
- __register_cpu_notifier(&uncore_cpu_nb);
- return 0;
-}
-
#define X86_UNCORE_MODEL_MATCH(model, init) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
@@ -1440,11 +1376,33 @@ static int __init intel_uncore_init(void)
if (cret && pret)
return -ENODEV;
- cpu_notifier_register_begin();
- ret = uncore_cpumask_init(!cret);
- if (ret)
- goto err;
- cpu_notifier_register_done();
+ /*
+ * Install callbacks. Core will call them for each online cpu.
+ *
+ * The first online cpu of each package allocates and takes
+ * the refcounts for all other online cpus in that package.
+ * If msrs are not enabled no allocation is required and
+ * uncore_cpu_prepare() is not called for each online cpu.
+ */
+ if (!cret) {
+ ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
+ "PERF_X86_UNCORE_PREP",
+ uncore_cpu_prepare, NULL);
+ if (ret)
+ goto err;
+ } else {
+ cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
+ "PERF_X86_UNCORE_PREP",
+ uncore_cpu_prepare, NULL);
+ }
+ first_init = 1;
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
+ "AP_PERF_X86_UNCORE_STARTING",
+ uncore_cpu_starting, uncore_cpu_dying);
+ first_init = 0;
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
+ "AP_PERF_X86_UNCORE_ONLINE",
+ uncore_event_cpu_online, uncore_event_cpu_offline);
return 0;
err:
@@ -1452,17 +1410,16 @@ err:
on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
uncore_types_exit(uncore_msr_uncores);
uncore_pci_exit();
- cpu_notifier_register_done();
return ret;
}
module_init(intel_uncore_init);
static void __exit intel_uncore_exit(void)
{
- cpu_notifier_register_begin();
- __unregister_cpu_notifier(&uncore_cpu_nb);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING);
+ cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP);
uncore_types_exit(uncore_msr_uncores);
uncore_pci_exit();
- cpu_notifier_register_done();
}
module_exit(intel_uncore_exit);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 94c18ebfd68c..5391b0ae7cc3 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -145,7 +145,6 @@ static inline void disable_acpi(void) { }
#define ARCH_HAS_POWER_INIT 1
#ifdef CONFIG_ACPI_NUMA
-extern int acpi_numa;
extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
@@ -170,4 +169,6 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
}
#endif
+#define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT)
+
#endif /* _ASM_X86_ACPI_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 59d34c521d96..9b7fa6313f1a 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -16,6 +16,7 @@ extern void prefill_possible_map(void);
static inline void prefill_possible_map(void) {}
#define cpu_physical_id(cpu) boot_cpu_physical_apicid
+#define cpu_acpi_id(cpu) 0
#define safe_smp_processor_id() 0
#endif /* CONFIG_SMP */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c64b1e9c5d1a..d683993248c8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -225,7 +225,6 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 5a73a9c62c39..56f4c6676b29 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -64,8 +64,6 @@
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
-#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
-#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae
#define MSR_TURBO_RATIO_LIMIT 0x000001ad
#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
#define MSR_TURBO_RATIO_LIMIT2 0x000001af
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 574c23cf761a..b6d425999f99 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -81,7 +81,11 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
struct page *page;
- page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
+
+ if (mm == &init_mm)
+ gfp &= ~__GFP_ACCOUNT;
+ page = alloc_pages(gfp, 0);
if (!page)
return NULL;
if (!pgtable_pmd_page_ctor(page)) {
@@ -125,7 +129,11 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return (pud_t *)get_zeroed_page(GFP_KERNEL);
+ gfp_t gfp = GFP_KERNEL_ACCOUNT;
+
+ if (mm == &init_mm)
+ gfp &= ~__GFP_ACCOUNT;
+ return (pud_t *)get_zeroed_page(gfp);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index fbc5e92e1ecc..643eba42d620 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -26,13 +26,11 @@
* @n: length of the copy in bytes
*
* Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
+ * a subsequent pmem driver flush operation will drain posted write queues.
*/
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
- size_t n)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
- int unwritten;
+ int rem;
/*
* We are copying between two kernel buffers, if
@@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
* fault) we would have already reported a general protection fault
* before the WARN+BUG.
*/
- unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
- (void __user *) src, n);
- if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
- __func__, dst, src, unwritten))
+ rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
+ if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
+ __func__, dst, src, rem))
BUG();
}
-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
- size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
- return memcpy_mcsafe(dst, (void __force *) src, n);
- memcpy(dst, (void __force *) src, n);
+ return memcpy_mcsafe(dst, src, n);
+ memcpy(dst, src, n);
return 0;
}
/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
- /*
- * wmb() to 'sfence' all previous writes such that they are
- * architecturally visible to 'pcommit'. Note, that we've
- * already arranged for pmem writes to avoid the cache via
- * arch_memcpy_to_pmem().
- */
- wmb();
- pcommit_sfence();
-}
-
-/**
* arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
- * instruction. This function requires explicit ordering with an
- * arch_wmb_pmem() call.
+ * instruction.
*/
-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
- void *vaddr = (void __force *)addr;
- void *vend = vaddr + size;
+ void *vend = addr + size;
void *p;
- for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+ for (p = (void *)((unsigned long)addr & ~clflush_mask);
p < vend; p += x86_clflush_size)
clwb(p);
}
@@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
* @i: iterator with source data
*
* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
*/
-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
- void *vaddr = (void __force *)addr;
size_t len;
/* TODO: skip the write-back by always using non-temporal stores */
- len = copy_from_iter_nocache(vaddr, bytes, i);
+ len = copy_from_iter_nocache(addr, bytes, i);
if (__iter_needs_pmem_wb(i))
arch_wb_cache_pmem(addr, bytes);
@@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
* @size: number of bytes to zero
*
* Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
*/
-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
{
- void *vaddr = (void __force *)addr;
-
- memset(vaddr, 0, size);
+ memset(addr, 0, size);
arch_wb_cache_pmem(addr, size);
}
-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
{
- clflush_cache_range((void __force *) addr, size);
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
- /*
- * We require that wmb() be an 'sfence', that is only guaranteed on
- * 64-bit builds
- */
- return static_cpu_has(X86_FEATURE_PCOMMIT);
+ clflush_cache_range(addr, size);
}
#endif /* CONFIG_ARCH_HAS_PMEM_API */
#endif /* __ASM_X86_PMEM_H__ */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 6271281f947d..2b5d686ea9f3 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -83,12 +83,6 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
int error_code, int si_code);
-extern unsigned long syscall_trace_enter_phase1(struct pt_regs *, u32 arch);
-extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch,
- unsigned long phase1_result);
-
-extern long syscall_trace_enter(struct pt_regs *);
-
static inline unsigned long regs_return_value(struct pt_regs *regs)
{
return regs->ax;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0576b6157f3a..ebd0c164cd4e 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -33,6 +33,7 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
}
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
+DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
@@ -135,6 +136,7 @@ int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
int common_cpu_die(unsigned int cpu);
void native_cpu_die(unsigned int cpu);
+void hlt_play_dead(void);
void native_play_dead(void);
void play_dead_common(void);
void wbinvd_on_cpu(int cpu);
@@ -147,6 +149,7 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
void smp_store_boot_cpu_info(void);
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
+#define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu)
#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu) wbinvd()
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index d96d04377765..587d7914ea4b 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -253,52 +253,6 @@ static inline void clwb(volatile void *__p)
: [pax] "a" (p));
}
-/**
- * pcommit_sfence() - persistent commit and fence
- *
- * The PCOMMIT instruction ensures that data that has been flushed from the
- * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
- * memory and is durable on the DIMM. The primary use case for this is
- * persistent memory.
- *
- * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
- * with appropriate fencing.
- *
- * Example:
- * void flush_and_commit_buffer(void *vaddr, unsigned int size)
- * {
- * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
- * void *vend = vaddr + size;
- * void *p;
- *
- * for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
- * p < vend; p += boot_cpu_data.x86_clflush_size)
- * clwb(p);
- *
- * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
- * // MFENCE via mb() also works
- * wmb();
- *
- * // PCOMMIT and the required SFENCE for ordering
- * pcommit_sfence();
- * }
- *
- * After this function completes the data pointed to by 'vaddr' has been
- * accepted to memory and will be durable if the 'vaddr' points to persistent
- * memory.
- *
- * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
- * things we include both the PCOMMIT and the required SFENCE in the
- * alternatives generated by pcommit_sfence().
- */
-static inline void pcommit_sfence(void)
-{
- alternative(ASM_NOP7,
- ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
- "sfence",
- X86_FEATURE_PCOMMIT);
-}
-
#define nop() asm volatile ("nop")
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 14c63c7e8337..a002b07a7099 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,7 +72,6 @@
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
-#define SECONDARY_EXEC_PCOMMIT 0x00200000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index 0d809e9fc975..3bdd10d71223 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -76,15 +76,18 @@
/*
* Leaf 5 (0x40000x04)
* HVM-specific features
+ * EAX: Features
+ * EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
*/
-/* EAX Features */
/* Virtualized APIC registers */
#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0)
/* Virtualized x2APIC accesses */
#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1)
/* Memory mapped from other domains has valid IOMMU entries */
#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2)
+/* vcpu id is present in EBX */
+#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3)
#define XEN_CPUID_MAX_NUM_LEAVES 4
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 5b15d94a33f8..37fee272618f 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -78,7 +78,6 @@
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
-#define EXIT_REASON_PCOMMIT 65
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -127,8 +126,7 @@
{ EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
- { EXIT_REASON_XRSTORS, "XRSTORS" }, \
- { EXIT_REASON_PCOMMIT, "PCOMMIT" }
+ { EXIT_REASON_XRSTORS, "XRSTORS" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 9414f84584e4..6738e5c82cca 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -161,13 +161,15 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
/**
* acpi_register_lapic - register a local apic and generates a logic cpu number
* @id: local apic id to register
+ * @acpiid: ACPI id to register
* @enabled: this cpu is enabled or not
*
* Returns the logic cpu number which maps to the local apic
*/
-static int acpi_register_lapic(int id, u8 enabled)
+static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
{
unsigned int ver = 0;
+ int cpu;
if (id >= MAX_LOCAL_APIC) {
printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
@@ -182,7 +184,11 @@ static int acpi_register_lapic(int id, u8 enabled)
if (boot_cpu_physical_apicid != -1U)
ver = apic_version[boot_cpu_physical_apicid];
- return generic_processor_info(id, ver);
+ cpu = generic_processor_info(id, ver);
+ if (cpu >= 0)
+ early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
+
+ return cpu;
}
static int __init
@@ -212,7 +218,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
if (!apic->apic_id_valid(apic_id) && enabled)
printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
else
- acpi_register_lapic(apic_id, enabled);
+ acpi_register_lapic(apic_id, processor->uid, enabled);
#else
printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
#endif
@@ -240,6 +246,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
* when we use CPU hotplug.
*/
acpi_register_lapic(processor->id, /* APIC ID */
+ processor->processor_id, /* ACPI ID */
processor->lapic_flags & ACPI_MADT_ENABLED);
return 0;
@@ -258,6 +265,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
acpi_table_print_madt_entry(header);
acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
+ processor->processor_id, /* ACPI ID */
processor->lapic_flags & ACPI_MADT_ENABLED);
return 0;
@@ -714,7 +722,7 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
{
int cpu;
- cpu = acpi_register_lapic(physid, ACPI_MADT_ENABLED);
+ cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED);
if (cpu < 0) {
pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
return cpu;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index cefacbad1531..456316f6c868 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -215,26 +215,18 @@ void apbt_setup_secondary_clock(void)
* cpu timers during the offline process due to the ordering of notification.
* the extra interrupt is harmless.
*/
-static int apbt_cpuhp_notify(struct notifier_block *n,
- unsigned long action, void *hcpu)
+static int apbt_cpu_dead(unsigned int cpu)
{
- unsigned long cpu = (unsigned long)hcpu;
struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DEAD:
- dw_apb_clockevent_pause(adev->timer);
- if (system_state == SYSTEM_RUNNING) {
- pr_debug("skipping APBT CPU %lu offline\n", cpu);
- } else {
- pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
- dw_apb_clockevent_stop(adev->timer);
- }
- break;
- default:
- pr_debug("APBT notified %lu, no action\n", action);
+ dw_apb_clockevent_pause(adev->timer);
+ if (system_state == SYSTEM_RUNNING) {
+ pr_debug("skipping APBT CPU %u offline\n", cpu);
+ } else {
+ pr_debug("APBT clockevent for cpu %u offline\n", cpu);
+ dw_apb_clockevent_stop(adev->timer);
}
- return NOTIFY_OK;
+ return 0;
}
static __init int apbt_late_init(void)
@@ -242,9 +234,8 @@ static __init int apbt_late_init(void)
if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ||
!apb_timer_block_enabled)
return 0;
- /* This notifier should be called after workqueue is ready */
- hotcpu_notifier(apbt_cpuhp_notify, -20);
- return 0;
+ return cpuhp_setup_state(CPUHP_X86_APB_DEAD, "X86_APB_DEAD", NULL,
+ apbt_cpu_dead);
}
fs_initcall(apbt_late_init);
#else
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f943d2f453a4..ac8d8ad8b009 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -92,8 +92,10 @@ static int apic_extnmi = APIC_EXTNMI_BSP;
*/
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 24170d0809ba..6368fa69d2af 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -152,68 +152,48 @@ static void init_x2apic_ldr(void)
}
}
- /*
- * At CPU state changes, update the x2apic cluster sibling info.
- */
-static int
-update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
+/*
+ * At CPU state changes, update the x2apic cluster sibling info.
+ */
+int x2apic_prepare_cpu(unsigned int cpu)
{
- unsigned int this_cpu = (unsigned long)hcpu;
- unsigned int cpu;
- int err = 0;
-
- switch (action) {
- case CPU_UP_PREPARE:
- if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
- GFP_KERNEL)) {
- err = -ENOMEM;
- } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu),
- GFP_KERNEL)) {
- free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
- err = -ENOMEM;
- }
- break;
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- for_each_online_cpu(cpu) {
- if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
- continue;
- cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
- cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
- }
- free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
- free_cpumask_var(per_cpu(ipi_mask, this_cpu));
- break;
+ if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
+ return -ENOMEM;
+
+ if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) {
+ free_cpumask_var(per_cpu(cpus_in_cluster, cpu));
+ return -ENOMEM;
}
- return notifier_from_errno(err);
+ return 0;
}
-static struct notifier_block x2apic_cpu_notifier = {
- .notifier_call = update_clusterinfo,
-};
-
-static int x2apic_init_cpu_notifier(void)
+int x2apic_dead_cpu(unsigned int this_cpu)
{
- int cpu = smp_processor_id();
-
- zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL);
+ int cpu;
- BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
-
- cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
- register_hotcpu_notifier(&x2apic_cpu_notifier);
- return 1;
+ for_each_online_cpu(cpu) {
+ if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+ continue;
+ cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+ cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ }
+ free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
+ free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+ return 0;
}
static int x2apic_cluster_probe(void)
{
- if (x2apic_mode)
- return x2apic_init_cpu_notifier();
- else
+ int cpu = smp_processor_id();
+
+ if (!x2apic_mode)
return 0;
+
+ cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
+ cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+ x2apic_prepare_cpu, x2apic_dead_cpu);
+ return 1;
}
static const struct cpumask *x2apic_cluster_target_cpus(void)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index f112af7aa62e..3d747070fe67 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -710,31 +710,29 @@ static void hpet_work(struct work_struct *w)
complete(&hpet_work->complete);
}
-static int hpet_cpuhp_notify(struct notifier_block *n,
- unsigned long action, void *hcpu)
+static int hpet_cpuhp_online(unsigned int cpu)
{
- unsigned long cpu = (unsigned long)hcpu;
struct hpet_work_struct work;
+
+ INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
+ init_completion(&work.complete);
+ /* FIXME: add schedule_work_on() */
+ schedule_delayed_work_on(cpu, &work.work, 0);
+ wait_for_completion(&work.complete);
+ destroy_delayed_work_on_stack(&work.work);
+ return 0;
+}
+
+static int hpet_cpuhp_dead(unsigned int cpu)
+{
struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
- init_completion(&work.complete);
- /* FIXME: add schedule_work_on() */
- schedule_delayed_work_on(cpu, &work.work, 0);
- wait_for_completion(&work.complete);
- destroy_delayed_work_on_stack(&work.work);
- break;
- case CPU_DEAD:
- if (hdev) {
- free_irq(hdev->irq, hdev);
- hdev->flags &= ~HPET_DEV_USED;
- per_cpu(cpu_hpet_dev, cpu) = NULL;
- }
- break;
- }
- return NOTIFY_OK;
+ if (!hdev)
+ return 0;
+ free_irq(hdev->irq, hdev);
+ hdev->flags &= ~HPET_DEV_USED;
+ per_cpu(cpu_hpet_dev, cpu) = NULL;
+ return 0;
}
#else
@@ -750,11 +748,8 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
}
#endif
-static int hpet_cpuhp_notify(struct notifier_block *n,
- unsigned long action, void *hcpu)
-{
- return NOTIFY_OK;
-}
+#define hpet_cpuhp_online NULL
+#define hpet_cpuhp_dead NULL
#endif
@@ -931,7 +926,7 @@ out_nohpet:
*/
static __init int hpet_late_init(void)
{
- int cpu;
+ int ret;
if (boot_hpet_disable)
return -ENODEV;
@@ -961,16 +956,20 @@ static __init int hpet_late_init(void)
if (boot_cpu_has(X86_FEATURE_ARAT))
return 0;
- cpu_notifier_register_begin();
- for_each_online_cpu(cpu) {
- hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
- }
-
/* This notifier should be called after workqueue is ready */
- __hotcpu_notifier(hpet_cpuhp_notify, -20);
- cpu_notifier_register_done();
-
+ ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "AP_X86_HPET_ONLINE",
+ hpet_cpuhp_online, NULL);
+ if (ret)
+ return ret;
+ ret = cpuhp_setup_state(CPUHP_X86_HPET_DEAD, "X86_HPET_DEAD", NULL,
+ hpet_cpuhp_dead);
+ if (ret)
+ goto err_cpuhp;
return 0;
+
+err_cpuhp:
+ cpuhp_remove_state(CPUHP_AP_X86_HPET_ONLINE);
+ return ret;
}
fs_initcall(hpet_late_init);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a2616584b6e9..6cb2b02fcc87 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -400,10 +400,6 @@ static void __init reserve_initrd(void)
memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
}
-static void __init early_initrd_acpi_init(void)
-{
- early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start);
-}
#else
static void __init early_reserve_initrd(void)
{
@@ -411,9 +407,6 @@ static void __init early_reserve_initrd(void)
static void __init reserve_initrd(void)
{
}
-static void __init early_initrd_acpi_init(void)
-{
-}
#endif /* CONFIG_BLK_DEV_INITRD */
static void __init parse_setup_data(void)
@@ -1149,7 +1142,7 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
- early_initrd_acpi_init();
+ acpi_table_upgrade();
vsmp_init();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e4fcb87ba7a6..7a40e068302d 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -236,6 +236,8 @@ void __init setup_per_cpu_areas(void)
early_per_cpu_map(x86_cpu_to_apicid, cpu);
per_cpu(x86_bios_cpu_apicid, cpu) =
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
+ per_cpu(x86_cpu_to_acpiid, cpu) =
+ early_per_cpu_map(x86_cpu_to_acpiid, cpu);
#endif
#ifdef CONFIG_X86_32
per_cpu(x86_cpu_to_logical_apicid, cpu) =
@@ -271,6 +273,7 @@ void __init setup_per_cpu_areas(void)
#ifdef CONFIG_X86_LOCAL_APIC
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
+ early_per_cpu_ptr(x86_cpu_to_acpiid) = NULL;
#endif
#ifdef CONFIG_X86_32
early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d0a51939c150..c93609c97406 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1644,7 +1644,7 @@ static inline void mwait_play_dead(void)
}
}
-static inline void hlt_play_dead(void)
+void hlt_play_dead(void)
{
if (__this_cpu_read(cpu_info.x86) >= 4)
wbinvd();
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 9b0185fbe3eb..654f6c66fe45 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -323,25 +323,16 @@ static int tboot_wait_for_aps(int num_aps)
return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
}
-static int tboot_cpu_callback(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+static int tboot_dying_cpu(unsigned int cpu)
{
- switch (action) {
- case CPU_DYING:
- atomic_inc(&ap_wfs_count);
- if (num_online_cpus() == 1)
- if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
- return NOTIFY_BAD;
- break;
+ atomic_inc(&ap_wfs_count);
+ if (num_online_cpus() == 1) {
+ if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
+ return -EBUSY;
}
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block tboot_cpu_notifier =
-{
- .notifier_call = tboot_cpu_callback,
-};
-
#ifdef CONFIG_DEBUG_FS
#define TBOOT_LOG_UUID { 0x26, 0x25, 0x19, 0xc0, 0x30, 0x6b, 0xb4, 0x4d, \
@@ -417,8 +408,8 @@ static __init int tboot_late_init(void)
tboot_create_trampoline();
atomic_set(&ap_wfs_count, 0);
- register_hotcpu_notifier(&tboot_cpu_notifier);
-
+ cpuhp_setup_state(CPUHP_AP_X86_TBOOT_DYING, "AP_X86_TBOOT_DYING", NULL,
+ tboot_dying_cpu);
#ifdef CONFIG_DEBUG_FS
debugfs_create_file("tboot_log", S_IRUSR,
arch_debugfs_dir, NULL, &tboot_log_fops);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7597b42a8a88..643565364497 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -366,7 +366,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
- F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
+ F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index e17a74b1d852..35058c2c0eea 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -144,14 +144,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
-static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
-}
-
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index c146f3c262c3..0149ac59c273 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -539,6 +539,7 @@ static void mtrr_lookup_var_start(struct mtrr_iter *iter)
iter->fixed = false;
iter->start_max = iter->start;
+ iter->range = NULL;
iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node);
__mtrr_lookup_var_next(iter);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64a79f271276..df07a0a4611f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2707,8 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING |
- SECONDARY_EXEC_XSAVES |
- SECONDARY_EXEC_PCOMMIT;
+ SECONDARY_EXEC_XSAVES;
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
@@ -3270,7 +3269,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_PCOMMIT |
SECONDARY_EXEC_TSC_SCALING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -4858,9 +4856,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
if (!enable_pml)
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
- /* Currently, we allow L1 guest to directly run pcommit instruction. */
- exec_control &= ~SECONDARY_EXEC_PCOMMIT;
-
return exec_control;
}
@@ -4904,9 +4899,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
- if (cpu_has_secondary_exec_ctrls())
+ if (cpu_has_secondary_exec_ctrls()) {
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
vmx_secondary_exec_control(vmx));
+ }
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
vmcs_write64(EOI_EXIT_BITMAP0, 0);
@@ -4979,6 +4975,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
if (vmx_xsaves_supported())
vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
+ if (enable_pml) {
+ ASSERT(vmx->pml_pg);
+ vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ }
+
return 0;
}
@@ -7558,13 +7560,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
return 1;
}
-static int handle_pcommit(struct kvm_vcpu *vcpu)
-{
- /* we never catch pcommit instruct for L1 guest. */
- WARN_ON(1);
- return 1;
-}
-
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7615,7 +7610,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
- [EXIT_REASON_PCOMMIT] = handle_pcommit,
};
static const int kvm_vmx_max_exit_handlers =
@@ -7924,8 +7918,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* the XSS exit bitmap in vmcs12.
*/
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
- case EXIT_REASON_PCOMMIT:
- return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
default:
return true;
}
@@ -7937,22 +7929,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
-static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
-{
- struct page *pml_pg;
-
- pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!pml_pg)
- return -ENOMEM;
-
- vmx->pml_pg = pml_pg;
-
- vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
-
- return 0;
-}
-
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
{
if (vmx->pml_pg) {
@@ -8224,6 +8200,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
exit_reason != EXIT_REASON_EPT_VIOLATION &&
+ exit_reason != EXIT_REASON_PML_FULL &&
exit_reason != EXIT_REASON_TASK_SWITCH)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
@@ -8854,6 +8831,22 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
put_cpu();
}
+/*
+ * Ensure that the current vmcs of the logical processor is the
+ * vmcs01 of the vcpu before calling free_nested().
+ */
+static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int r;
+
+ r = vcpu_load(vcpu);
+ BUG_ON(r);
+ vmx_load_vmcs01(vcpu);
+ free_nested(vmx);
+ vcpu_put(vcpu);
+}
+
static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8862,8 +8855,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
vmx_destroy_pml_buffer(vmx);
free_vpid(vmx->vpid);
leave_guest_mode(vcpu);
- vmx_load_vmcs01(vcpu);
- free_nested(vmx);
+ vmx_free_vcpu_nested(vcpu);
free_loaded_vmcs(vmx->loaded_vmcs);
kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
@@ -8885,14 +8877,26 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (err)
goto free_vcpu;
+ err = -ENOMEM;
+
+ /*
+ * If PML is turned on, failure on enabling PML just results in failure
+ * of creating the vcpu, therefore we can simplify PML logic (by
+ * avoiding dealing with cases, such as enabling PML partially on vcpus
+ * for the guest, etc.
+ */
+ if (enable_pml) {
+ vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vmx->pml_pg)
+ goto uninit_vcpu;
+ }
+
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
> PAGE_SIZE);
- err = -ENOMEM;
- if (!vmx->guest_msrs) {
- goto uninit_vcpu;
- }
+ if (!vmx->guest_msrs)
+ goto free_pml;
vmx->loaded_vmcs = &vmx->vmcs01;
vmx->loaded_vmcs->vmcs = alloc_vmcs();
@@ -8936,18 +8940,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->nested.current_vmptr = -1ull;
vmx->nested.current_vmcs12 = NULL;
- /*
- * If PML is turned on, failure on enabling PML just results in failure
- * of creating the vcpu, therefore we can simplify PML logic (by
- * avoiding dealing with cases, such as enabling PML partially on vcpus
- * for the guest, etc.
- */
- if (enable_pml) {
- err = vmx_create_pml_buffer(vmx);
- if (err)
- goto free_vmcs;
- }
-
return &vmx->vcpu;
free_vmcs:
@@ -8955,6 +8947,8 @@ free_vmcs:
free_loaded_vmcs(vmx->loaded_vmcs);
free_msrs:
kfree(vmx->guest_msrs);
+free_pml:
+ vmx_destroy_pml_buffer(vmx);
uninit_vcpu:
kvm_vcpu_uninit(&vmx->vcpu);
free_vcpu:
@@ -9086,15 +9080,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (cpu_has_secondary_exec_ctrls())
vmcs_set_secondary_exec_control(secondary_exec_ctl);
-
- if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
- if (guest_cpuid_has_pcommit(vcpu))
- vmx->nested.nested_vmx_secondary_ctls_high |=
- SECONDARY_EXEC_PCOMMIT;
- else
- vmx->nested.nested_vmx_secondary_ctls_high &=
- ~SECONDARY_EXEC_PCOMMIT;
- }
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9707,8 +9692,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_PCOMMIT);
+ SECONDARY_EXEC_APIC_REGISTER_VIRT);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12->secondary_vm_exec_control;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b2766723c951..45608a7da9b3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5552,9 +5552,10 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
}
EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
-static void tsc_bad(void *info)
+static int kvmclock_cpu_down_prep(unsigned int cpu)
{
__this_cpu_write(cpu_tsc_khz, 0);
+ return 0;
}
static void tsc_khz_changed(void *data)
@@ -5659,35 +5660,18 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
.notifier_call = kvmclock_cpufreq_notifier
};
-static int kvmclock_cpu_notifier(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int kvmclock_cpu_online(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
- break;
- case CPU_DOWN_PREPARE:
- smp_call_function_single(cpu, tsc_bad, NULL, 1);
- break;
- }
- return NOTIFY_OK;
+ tsc_khz_changed(NULL);
+ return 0;
}
-static struct notifier_block kvmclock_cpu_notifier_block = {
- .notifier_call = kvmclock_cpu_notifier,
- .priority = -INT_MAX
-};
-
static void kvm_timer_init(void)
{
int cpu;
max_tsc_khz = tsc_khz;
- cpu_notifier_register_begin();
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
#ifdef CONFIG_CPU_FREQ
struct cpufreq_policy policy;
@@ -5702,12 +5686,9 @@ static void kvm_timer_init(void)
CPUFREQ_TRANSITION_NOTIFIER);
}
pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
- for_each_online_cpu(cpu)
- smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
-
- __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
- cpu_notifier_register_done();
+ cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
+ kvmclock_cpu_online, kvmclock_cpu_down_prep);
}
static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@ -5896,7 +5877,7 @@ void kvm_arch_exit(void)
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
- unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+ cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
#endif
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index ec378cd7b71e..767be7c76034 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1012,7 +1012,7 @@ GrpTable: Grp15
4: XSAVE
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
EndTable
GrpTable: Grp16
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d22161ab941d..dc8023060456 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1353,7 +1353,7 @@ good_area:
* the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if
* we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
*/
- fault = handle_mm_fault(mm, vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags);
major |= fault & VM_FAULT_MAJOR;
/*
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 9c086c57105c..968ac028c34e 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,4 +1,5 @@
/* Common code for 32 and 64-bit NUMA */
+#include <linux/acpi.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
@@ -15,7 +16,6 @@
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/dma.h>
-#include <asm/acpi.h>
#include <asm/amd_nb.h>
#include "numa_internal.h"
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index aa0ff4b02a96..3feec5af4e67 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,7 +6,7 @@
#include <asm/fixmap.h>
#include <asm/mtrr.h>
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
+#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO)
#ifdef CONFIG_HIGHPTE
#define PGALLOC_USER_GFP __GFP_HIGHMEM
@@ -18,7 +18,7 @@ gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- return (pte_t *)__get_free_page(PGALLOC_GFP);
+ return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
}
pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -207,9 +207,13 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
{
int i;
bool failed = false;
+ gfp_t gfp = PGALLOC_GFP;
+
+ if (mm == &init_mm)
+ gfp &= ~__GFP_ACCOUNT;
for(i = 0; i < PREALLOCATED_PMDS; i++) {
- pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
+ pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
if (!pmd)
failed = true;
if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index b5f821881465..b1ecff460a46 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -15,8 +15,6 @@
#include <linux/bitmap.h>
#include <linux/module.h>
#include <linux/topology.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
#include <linux/mm.h>
#include <asm/proto.h>
#include <asm/numa.h>
@@ -24,51 +22,6 @@
#include <asm/apic.h>
#include <asm/uv/uv.h>
-int acpi_numa __initdata;
-
-static __init int setup_node(int pxm)
-{
- return acpi_map_pxm_to_node(pxm);
-}
-
-static __init void bad_srat(void)
-{
- printk(KERN_ERR "SRAT: SRAT not used.\n");
- acpi_numa = -1;
-}
-
-static __init inline int srat_disabled(void)
-{
- return acpi_numa < 0;
-}
-
-/*
- * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
- * I/O localities since SRAT does not list them. I/O localities are
- * not supported at this point.
- */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
- int i, j;
-
- for (i = 0; i < slit->locality_count; i++) {
- const int from_node = pxm_to_node(i);
-
- if (from_node == NUMA_NO_NODE)
- continue;
-
- for (j = 0; j < slit->locality_count; j++) {
- const int to_node = pxm_to_node(j);
-
- if (to_node == NUMA_NO_NODE)
- continue;
-
- numa_set_distance(from_node, to_node,
- slit->entry[slit->locality_count * i + j]);
- }
- }
-}
-
/* Callback for Proximity Domain -> x2APIC mapping */
void __init
acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
@@ -91,7 +44,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
pxm, apic_id);
return;
}
- node = setup_node(pxm);
+ node = acpi_map_pxm_to_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
@@ -104,7 +57,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
}
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
- acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
}
@@ -127,7 +79,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
pxm = pa->proximity_domain_lo;
if (acpi_srat_revision >= 2)
pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
- node = setup_node(pxm);
+ node = acpi_map_pxm_to_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
@@ -146,74 +98,10 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
- acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline int save_add_info(void) {return 1;}
-#else
-static inline int save_add_info(void) {return 0;}
-#endif
-
-/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-int __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
-{
- u64 start, end;
- u32 hotpluggable;
- int node, pxm;
-
- if (srat_disabled())
- goto out_err;
- if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
- goto out_err_bad_srat;
- if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
- goto out_err;
- hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
- if (hotpluggable && !save_add_info())
- goto out_err;
-
- start = ma->base_address;
- end = start + ma->length;
- pxm = ma->proximity_domain;
- if (acpi_srat_revision <= 1)
- pxm &= 0xff;
-
- node = setup_node(pxm);
- if (node < 0) {
- printk(KERN_ERR "SRAT: Too many proximity domains.\n");
- goto out_err_bad_srat;
- }
-
- if (numa_add_memblk(node, start, end) < 0)
- goto out_err_bad_srat;
-
- node_set(node, numa_nodes_parsed);
-
- pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
- node, pxm,
- (unsigned long long) start, (unsigned long long) end - 1,
- hotpluggable ? " hotplug" : "",
- ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
-
- /* Mark hotplug range in memblock. */
- if (hotpluggable && memblock_mark_hotplug(start, ma->length))
- pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
- (unsigned long long)start, (unsigned long long)end - 1);
-
- max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
-
- return 0;
-out_err_bad_srat:
- bad_srat();
-out_err:
- return -1;
-}
-
-void __init acpi_numa_arch_fixup(void) {}
-
int __init x86_acpi_numa_init(void)
{
int ret;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index d5f64996394a..b12c26e2e309 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/smp.h>
#include <linux/perf_event.h>
+#include <linux/tboot.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
@@ -266,6 +267,35 @@ void notrace restore_processor_state(void)
EXPORT_SYMBOL(restore_processor_state);
#endif
+#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HOTPLUG_CPU)
+static void resume_play_dead(void)
+{
+ play_dead_common();
+ tboot_shutdown(TB_SHUTDOWN_WFS);
+ hlt_play_dead();
+}
+
+int hibernate_resume_nonboot_cpu_disable(void)
+{
+ void (*play_dead)(void) = smp_ops.play_dead;
+ int ret;
+
+ /*
+ * Ensure that MONITOR/MWAIT will not be used in the "play dead" loop
+ * during hibernate image restoration, because it is likely that the
+ * monitored address will be actually written to at that time and then
+ * the "dead" CPU will attempt to execute instructions again, but the
+ * address in its instruction pointer may not be possible to resolve
+ * any more at that point (the page tables used by it previously may
+ * have been overwritten by hibernate image data).
+ */
+ smp_ops.play_dead = resume_play_dead;
+ ret = disable_nonboot_cpus();
+ smp_ops.play_dead = play_dead;
+ return ret;
+}
+#endif
+
/*
* When bsp_check() is called in hibernate and suspend, cpu hotplug
* is disabled already. So it's unnessary to handle race condition between
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 3177c2bc26f6..8eee0e9c93f0 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -24,7 +24,6 @@
#include <asm/frame.h>
ENTRY(swsusp_arch_suspend)
- FRAME_BEGIN
movq $saved_context, %rax
movq %rsp, pt_regs_sp(%rax)
movq %rbp, pt_regs_bp(%rax)
@@ -48,6 +47,7 @@ ENTRY(swsusp_arch_suspend)
movq %cr3, %rax
movq %rax, restore_cr3(%rip)
+ FRAME_BEGIN
call swsusp_save
FRAME_END
ret
@@ -104,7 +104,6 @@ ENTRY(core_restore_code)
/* code below belongs to the image kernel */
.align PAGE_SIZE
ENTRY(restore_registers)
- FRAME_BEGIN
/* go back to the original page tables */
movq %r9, %cr3
@@ -145,6 +144,5 @@ ENTRY(restore_registers)
/* tell the hibernation core that we've just restored the memory */
movq %rax, in_suspend(%rip)
- FRAME_END
ret
ENDPROC(restore_registers)
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index be14cc3e48d5..3be012115853 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -20,10 +20,121 @@
#include <linux/init.h>
#include <linux/string.h>
+#include <xen/xen.h>
#include <xen/xen-ops.h>
+#include <xen/interface/platform.h>
#include <asm/page.h>
#include <asm/setup.h>
+#include <asm/xen/hypercall.h>
+
+static efi_char16_t vendor[100] __initdata;
+
+static efi_system_table_t efi_systab_xen __initdata = {
+ .hdr = {
+ .signature = EFI_SYSTEM_TABLE_SIGNATURE,
+ .revision = 0, /* Initialized later. */
+ .headersize = 0, /* Ignored by Linux Kernel. */
+ .crc32 = 0, /* Ignored by Linux Kernel. */
+ .reserved = 0
+ },
+ .fw_vendor = EFI_INVALID_TABLE_ADDR, /* Initialized later. */
+ .fw_revision = 0, /* Initialized later. */
+ .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+ .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+ .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+ .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+ .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+ .stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */
+ .runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR,
+ /* Not used under Xen. */
+ .boottime = (efi_boot_services_t *)EFI_INVALID_TABLE_ADDR,
+ /* Not used under Xen. */
+ .nr_tables = 0, /* Initialized later. */
+ .tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */
+};
+
+static const struct efi efi_xen __initconst = {
+ .systab = NULL, /* Initialized later. */
+ .runtime_version = 0, /* Initialized later. */
+ .mps = EFI_INVALID_TABLE_ADDR,
+ .acpi = EFI_INVALID_TABLE_ADDR,
+ .acpi20 = EFI_INVALID_TABLE_ADDR,
+ .smbios = EFI_INVALID_TABLE_ADDR,
+ .smbios3 = EFI_INVALID_TABLE_ADDR,
+ .sal_systab = EFI_INVALID_TABLE_ADDR,
+ .boot_info = EFI_INVALID_TABLE_ADDR,
+ .hcdp = EFI_INVALID_TABLE_ADDR,
+ .uga = EFI_INVALID_TABLE_ADDR,
+ .uv_systab = EFI_INVALID_TABLE_ADDR,
+ .fw_vendor = EFI_INVALID_TABLE_ADDR,
+ .runtime = EFI_INVALID_TABLE_ADDR,
+ .config_table = EFI_INVALID_TABLE_ADDR,
+ .get_time = xen_efi_get_time,
+ .set_time = xen_efi_set_time,
+ .get_wakeup_time = xen_efi_get_wakeup_time,
+ .set_wakeup_time = xen_efi_set_wakeup_time,
+ .get_variable = xen_efi_get_variable,
+ .get_next_variable = xen_efi_get_next_variable,
+ .set_variable = xen_efi_set_variable,
+ .query_variable_info = xen_efi_query_variable_info,
+ .update_capsule = xen_efi_update_capsule,
+ .query_capsule_caps = xen_efi_query_capsule_caps,
+ .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
+ .reset_system = NULL, /* Functionality provided by Xen. */
+ .set_virtual_address_map = NULL, /* Not used under Xen. */
+ .flags = 0 /* Initialized later. */
+};
+
+static efi_system_table_t __init *xen_efi_probe(void)
+{
+ struct xen_platform_op op = {
+ .cmd = XENPF_firmware_info,
+ .u.firmware_info = {
+ .type = XEN_FW_EFI_INFO,
+ .index = XEN_FW_EFI_CONFIG_TABLE
+ }
+ };
+ union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info;
+
+ if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0)
+ return NULL;
+
+ /* Here we know that Xen runs on EFI platform. */
+
+ efi = efi_xen;
+
+ efi_systab_xen.tables = info->cfg.addr;
+ efi_systab_xen.nr_tables = info->cfg.nent;
+
+ op.cmd = XENPF_firmware_info;
+ op.u.firmware_info.type = XEN_FW_EFI_INFO;
+ op.u.firmware_info.index = XEN_FW_EFI_VENDOR;
+ info->vendor.bufsz = sizeof(vendor);
+ set_xen_guest_handle(info->vendor.name, vendor);
+
+ if (HYPERVISOR_platform_op(&op) == 0) {
+ efi_systab_xen.fw_vendor = __pa_symbol(vendor);
+ efi_systab_xen.fw_revision = info->vendor.revision;
+ } else
+ efi_systab_xen.fw_vendor = __pa_symbol(L"UNKNOWN");
+
+ op.cmd = XENPF_firmware_info;
+ op.u.firmware_info.type = XEN_FW_EFI_INFO;
+ op.u.firmware_info.index = XEN_FW_EFI_VERSION;
+
+ if (HYPERVISOR_platform_op(&op) == 0)
+ efi_systab_xen.hdr.revision = info->version;
+
+ op.cmd = XENPF_firmware_info;
+ op.u.firmware_info.type = XEN_FW_EFI_INFO;
+ op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION;
+
+ if (HYPERVISOR_platform_op(&op) == 0)
+ efi.runtime_version = info->version;
+
+ return &efi_systab_xen;
+}
void __init xen_efi_init(void)
{
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0f87db2cc6a8..69b4b6d29738 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -59,6 +59,7 @@
#include <asm/xen/pci.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/cpuid.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -118,6 +119,10 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
*/
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+/* Linux <-> Xen vCPU id mapping */
+DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
+
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);
@@ -179,7 +184,7 @@ static void clamp_max_cpus(void)
#endif
}
-static void xen_vcpu_setup(int cpu)
+void xen_vcpu_setup(int cpu)
{
struct vcpu_register_vcpu_info info;
int err;
@@ -202,8 +207,9 @@ static void xen_vcpu_setup(int cpu)
if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
return;
}
- if (cpu < MAX_VIRT_CPUS)
- per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
+ per_cpu(xen_vcpu, cpu) =
+ &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
if (!have_vcpu_info_placement) {
if (cpu >= MAX_VIRT_CPUS)
@@ -223,7 +229,8 @@ static void xen_vcpu_setup(int cpu)
hypervisor has no unregister variant and this hypercall does not
allow to over-write info.mfn and info.offset.
*/
- err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
+ err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
+ &info);
if (err) {
printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
@@ -247,10 +254,11 @@ void xen_vcpu_restore(void)
for_each_possible_cpu(cpu) {
bool other_cpu = (cpu != smp_processor_id());
- bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+ bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu),
+ NULL);
if (other_cpu && is_up &&
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+ HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
BUG();
xen_setup_runstate_info(cpu);
@@ -259,7 +267,7 @@ void xen_vcpu_restore(void)
xen_vcpu_setup(cpu);
if (other_cpu && is_up &&
- HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+ HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
BUG();
}
}
@@ -588,7 +596,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
{
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
- unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
unsigned long frames[pages];
int f;
@@ -637,7 +645,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
{
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
- unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
unsigned long frames[pages];
int f;
@@ -1135,8 +1143,11 @@ void xen_setup_vcpu_info_placement(void)
{
int cpu;
- for_each_possible_cpu(cpu)
+ for_each_possible_cpu(cpu) {
+ /* Set up direct vCPU id mapping for PV guests. */
+ per_cpu(xen_vcpu_id, cpu) = cpu;
xen_vcpu_setup(cpu);
+ }
/* xen_vcpu_setup managed to place the vcpu_info within the
* percpu area for all cpus, so make use of it. Note that for
@@ -1727,6 +1738,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
xen_raw_console_write("about to get started...\n");
+ /* Let's presume PV guests always boot on vCPU with id 0. */
+ per_cpu(xen_vcpu_id, 0) = 0;
+
xen_setup_runstate_info(0);
xen_efi_init();
@@ -1768,9 +1782,10 @@ void __ref xen_hvm_init_shared_info(void)
* in that case multiple vcpus might be online. */
for_each_online_cpu(cpu) {
/* Leave it to be NULL. */
- if (cpu >= MAX_VIRT_CPUS)
+ if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
continue;
- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ per_cpu(xen_vcpu, cpu) =
+ &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
}
}
@@ -1795,6 +1810,12 @@ static void __init init_hvm_pv_info(void)
xen_setup_features();
+ cpuid(base + 4, &eax, &ebx, &ecx, &edx);
+ if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
+ this_cpu_write(xen_vcpu_id, ebx);
+ else
+ this_cpu_write(xen_vcpu_id, smp_processor_id());
+
pv_info.name = "Xen HVM";
xen_domain_type = XEN_HVM_DOMAIN;
@@ -1806,6 +1827,10 @@ static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action,
int cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
+ if (cpu_acpi_id(cpu) != U32_MAX)
+ per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
+ else
+ per_cpu(xen_vcpu_id, cpu) = cpu;
xen_vcpu_setup(cpu);
if (xen_have_vector_callback) {
if (xen_feature(XENFEAT_hvm_safe_pvclock))
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index e079500b17f3..de4144c24f1c 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -111,63 +111,18 @@ int arch_gnttab_init(unsigned long nr_shared)
}
#ifdef CONFIG_XEN_PVH
-#include <xen/balloon.h>
#include <xen/events.h>
-#include <linux/slab.h>
-static int __init xlated_setup_gnttab_pages(void)
-{
- struct page **pages;
- xen_pfn_t *pfns;
- void *vaddr;
- int rc;
- unsigned int i;
- unsigned long nr_grant_frames = gnttab_max_grant_frames();
-
- BUG_ON(nr_grant_frames == 0);
- pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL);
- if (!pfns) {
- kfree(pages);
- return -ENOMEM;
- }
- rc = alloc_xenballooned_pages(nr_grant_frames, pages);
- if (rc) {
- pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
- nr_grant_frames, rc);
- kfree(pages);
- kfree(pfns);
- return rc;
- }
- for (i = 0; i < nr_grant_frames; i++)
- pfns[i] = page_to_pfn(pages[i]);
-
- vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL);
- if (!vaddr) {
- pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
- nr_grant_frames, rc);
- free_xenballooned_pages(nr_grant_frames, pages);
- kfree(pages);
- kfree(pfns);
- return -ENOMEM;
- }
- kfree(pages);
-
- xen_auto_xlat_grant_frames.pfn = pfns;
- xen_auto_xlat_grant_frames.count = nr_grant_frames;
- xen_auto_xlat_grant_frames.vaddr = vaddr;
-
- return 0;
-}
-
+#include <xen/xen-ops.h>
static int __init xen_pvh_gnttab_setup(void)
{
if (!xen_pvh_domain())
return -ENODEV;
- return xlated_setup_gnttab_pages();
+ xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
+
+ return xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
+ &xen_auto_xlat_grant_frames.vaddr,
+ xen_auto_xlat_grant_frames.count);
}
/* Call it _before_ __gnttab_init as we need to initialize the
* xen_auto_xlat_grant_frames first. */
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index a1207cb6472a..33e92955e09d 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -109,7 +109,8 @@ static void xen_safe_halt(void)
static void xen_halt(void)
{
if (irqs_disabled())
- HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+ HYPERVISOR_vcpu_op(VCPUOP_down,
+ xen_vcpu_nr(smp_processor_id()), NULL);
else
xen_safe_halt();
}
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 9466354d3e49..32bdc2c90297 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -547,7 +547,7 @@ void xen_pmu_init(int cpu)
return;
fail:
- pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
+ pr_info_once("Could not initialize VPMU for cpu %d, error %d\n",
cpu, err);
free_pages((unsigned long)xenpmu_data, 0);
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 719cf291dcdf..0b4d04c8ab4d 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -322,6 +322,13 @@ static void __init xen_smp_prepare_boot_cpu(void)
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
+
+ /*
+ * Setup vcpu_info for boot CPU.
+ */
+ if (xen_hvm_domain())
+ xen_vcpu_setup(0);
+
/*
* The alternative logic (which patches the unlock/lock) runs before
* the smp bootup up code is activated. Hence we need to set this up
@@ -454,7 +461,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
#endif
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
- if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
+ if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
BUG();
kfree(ctxt);
@@ -492,7 +499,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
if (rc)
return rc;
- rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
+ rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
BUG_ON(rc);
while (cpu_report_state(cpu) != CPU_ONLINE)
@@ -520,7 +527,8 @@ static int xen_cpu_disable(void)
static void xen_cpu_die(unsigned int cpu)
{
- while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+ while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up,
+ xen_vcpu_nr(cpu), NULL)) {
__set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ/10);
}
@@ -536,7 +544,7 @@ static void xen_cpu_die(unsigned int cpu)
static void xen_play_dead(void) /* used only with HOTPLUG_CPU */
{
play_dead_common();
- HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+ HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
cpu_bringup();
/*
* commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
@@ -576,7 +584,7 @@ static void stop_self(void *v)
set_cpu_online(cpu, false);
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
+ HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
BUG();
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 6deba5bc7e34..67356d29d74d 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -11,8 +11,6 @@
#include <linux/interrupt.h>
#include <linux/clocksource.h>
#include <linux/clockchips.h>
-#include <linux/kernel_stat.h>
-#include <linux/math64.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/pvclock_gtod.h>
@@ -31,44 +29,6 @@
/* Xen may fire a timer up to this many ns early */
#define TIMER_SLOP 100000
-#define NS_PER_TICK (1000000000LL / HZ)
-
-/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-
-/* unused ns of stolen time */
-static DEFINE_PER_CPU(u64, xen_residual_stolen);
-
-static void do_stolen_accounting(void)
-{
- struct vcpu_runstate_info state;
- struct vcpu_runstate_info *snap;
- s64 runnable, offline, stolen;
- cputime_t ticks;
-
- xen_get_runstate_snapshot(&state);
-
- WARN_ON(state.state != RUNSTATE_running);
-
- snap = this_cpu_ptr(&xen_runstate_snapshot);
-
- /* work out how much time the VCPU has not been runn*ing* */
- runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
- offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
-
- *snap = state;
-
- /* Add the appropriate number of ticks of stolen time,
- including any left-overs from last time. */
- stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
-
- if (stolen < 0)
- stolen = 0;
-
- ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
- __this_cpu_write(xen_residual_stolen, stolen);
- account_steal_ticks(ticks);
-}
/* Get the TSC speed from Xen */
static unsigned long xen_tsc_khz(void)
@@ -263,8 +223,10 @@ static int xen_vcpuop_shutdown(struct clock_event_device *evt)
{
int cpu = smp_processor_id();
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
- HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
+ NULL) ||
+ HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+ NULL))
BUG();
return 0;
@@ -274,7 +236,8 @@ static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
{
int cpu = smp_processor_id();
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+ NULL))
BUG();
return 0;
@@ -293,7 +256,8 @@ static int xen_vcpuop_set_next_event(unsigned long delta,
/* Get an event anyway, even if the timeout is already expired */
single.flags = 0;
- ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
+ ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
+ &single);
BUG_ON(ret != 0);
return ret;
@@ -335,8 +299,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
ret = IRQ_HANDLED;
}
- do_stolen_accounting();
-
return ret;
}
@@ -394,13 +356,15 @@ void xen_timer_resume(void)
return;
for_each_online_cpu(cpu) {
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
+ xen_vcpu_nr(cpu), NULL))
BUG();
}
}
static const struct pv_time_ops xen_time_ops __initconst = {
.sched_clock = xen_clocksource_read,
+ .steal_clock = xen_steal_clock,
};
static void __init xen_time_init(void)
@@ -414,7 +378,8 @@ static void __init xen_time_init(void)
clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
- if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
+ if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
+ NULL) == 0) {
/* Successfully turned off 100Hz tick, so we have the
vcpuop-based timer interface */
printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
@@ -431,6 +396,8 @@ static void __init xen_time_init(void)
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
+ xen_time_setup_guest();
+
if (xen_initial_domain())
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 4140b070f2e9..3cbce3b085e7 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -76,6 +76,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
bool xen_vcpu_stolen(int vcpu);
+void xen_vcpu_setup(int cpu);
void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
OpenPOWER on IntegriCloud