summaryrefslogtreecommitdiffstats
path: root/drivers/hv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/hv')
-rw-r--r--drivers/hv/Kconfig3
-rw-r--r--drivers/hv/Makefile1
-rw-r--r--drivers/hv/channel.c2
-rw-r--r--drivers/hv/channel_mgmt.c182
-rw-r--r--drivers/hv/connection.c95
-rw-r--r--drivers/hv/hv.c70
-rw-r--r--drivers/hv/hv_balloon.c268
-rw-r--r--drivers/hv/hv_debugfs.c178
-rw-r--r--drivers/hv/hv_fcopy.c57
-rw-r--r--drivers/hv/hv_kvp.c46
-rw-r--r--drivers/hv/hv_snapshot.c58
-rw-r--r--drivers/hv/hv_util.c169
-rw-r--r--drivers/hv/hyperv_vmbus.h77
-rw-r--r--drivers/hv/ring_buffer.c2
-rw-r--r--drivers/hv/vmbus_drv.c308
15 files changed, 1259 insertions, 257 deletions
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 9a59957922d4..79e5356a737a 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -14,9 +14,6 @@ config HYPERV
config HYPERV_TIMER
def_bool HYPERV
-config HYPERV_TSCPAGE
- def_bool HYPERV && X86_64
-
config HYPERV_UTILS
tristate "Microsoft Hyper-V Utilities driver"
depends on HYPERV && CONNECTOR && NLS
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index a1eec7177c2d..94daf8240c95 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -9,4 +9,5 @@ CFLAGS_hv_balloon.o = -I$(src)
hv_vmbus-y := vmbus_drv.o \
hv.o connection.o channel.o \
channel_mgmt.o ring_buffer.o hv_trace.o
+hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 5f9505a087f6..23f358cb7f49 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -26,7 +26,7 @@
static unsigned long virt_to_hvpfn(void *addr)
{
- unsigned long paddr;
+ phys_addr_t paddr;
if (is_vmalloc_addr(addr))
paddr = page_to_phys(vmalloc_to_page(addr)) +
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index addcef50df7a..0370364169c4 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
cpumask_clear_cpu(channel->target_cpu,
&primary_channel->alloced_cpus_in_node);
- vmbus_release_relid(channel->offermsg.child_relid);
+ /*
+ * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
+ * the relid is invalidated; after hibernation, when the user-space app
+ * destroys the channel, the relid is INVALID_RELID, and in this case
+ * it's unnecessary and unsafe to release the old relid, since the same
+ * relid can refer to a completely different channel now.
+ */
+ if (channel->offermsg.child_relid != INVALID_RELID)
+ vmbus_release_relid(channel->offermsg.child_relid);
free_channel(channel);
}
@@ -545,6 +553,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
mutex_lock(&vmbus_connection.channel_mutex);
+ /* Remember the channels that should be cleaned up upon suspend. */
+ if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
+ atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
+
/*
* Now that we have acquired the channel_mutex,
* we can release the potentially racing rescind thread.
@@ -847,6 +859,67 @@ void vmbus_initiate_unload(bool crash)
vmbus_wait_for_unload();
}
+static void check_ready_for_resume_event(void)
+{
+ /*
+ * If all the old primary channels have been fixed up, then it's safe
+ * to resume.
+ */
+ if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
+ complete(&vmbus_connection.ready_for_resume_event);
+}
+
+static void vmbus_setup_channel_state(struct vmbus_channel *channel,
+ struct vmbus_channel_offer_channel *offer)
+{
+ /*
+ * Setup state for signalling the host.
+ */
+ channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
+
+ if (vmbus_proto_version != VERSION_WS2008) {
+ channel->is_dedicated_interrupt =
+ (offer->is_dedicated_interrupt != 0);
+ channel->sig_event = offer->connection_id;
+ }
+
+ memcpy(&channel->offermsg, offer,
+ sizeof(struct vmbus_channel_offer_channel));
+ channel->monitor_grp = (u8)offer->monitorid / 32;
+ channel->monitor_bit = (u8)offer->monitorid % 32;
+}
+
+/*
+ * find_primary_channel_by_offer - Get the channel object given the new offer.
+ * This is only used in the resume path of hibernation.
+ */
+static struct vmbus_channel *
+find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
+{
+ struct vmbus_channel *channel = NULL, *iter;
+ const guid_t *inst1, *inst2;
+
+ /* Ignore sub-channel offers. */
+ if (offer->offer.sub_channel_index != 0)
+ return NULL;
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
+ inst1 = &iter->offermsg.offer.if_instance;
+ inst2 = &offer->offer.if_instance;
+
+ if (guid_equal(inst1, inst2)) {
+ channel = iter;
+ break;
+ }
+ }
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ return channel;
+}
+
/*
* vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
*
@@ -854,12 +927,58 @@ void vmbus_initiate_unload(bool crash)
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
{
struct vmbus_channel_offer_channel *offer;
- struct vmbus_channel *newchannel;
+ struct vmbus_channel *oldchannel, *newchannel;
+ size_t offer_sz;
offer = (struct vmbus_channel_offer_channel *)hdr;
trace_vmbus_onoffer(offer);
+ oldchannel = find_primary_channel_by_offer(offer);
+
+ if (oldchannel != NULL) {
+ atomic_dec(&vmbus_connection.offer_in_progress);
+
+ /*
+ * We're resuming from hibernation: all the sub-channel and
+ * hv_sock channels we had before the hibernation should have
+ * been cleaned up, and now we must be seeing a re-offered
+ * primary channel that we had before the hibernation.
+ */
+
+ WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
+ /* Fix up the relid. */
+ oldchannel->offermsg.child_relid = offer->child_relid;
+
+ offer_sz = sizeof(*offer);
+ if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
+ check_ready_for_resume_event();
+ return;
+ }
+
+ /*
+ * This is not an error, since the host can also change the
+ * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
+ * the offer->connection_id of the Mellanox VF vmbus device
+ * can change when the host reoffers the device upon resume.
+ */
+ pr_debug("vmbus offer changed: relid=%d\n",
+ offer->child_relid);
+
+ print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, &oldchannel->offermsg, offer_sz,
+ false);
+ print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, offer, offer_sz, false);
+
+ /* Fix up the old channel. */
+ vmbus_setup_channel_state(oldchannel, offer);
+
+ check_ready_for_resume_event();
+
+ return;
+ }
+
/* Allocate the channel object and save this offer. */
newchannel = alloc_channel();
if (!newchannel) {
@@ -869,25 +988,21 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
return;
}
- /*
- * Setup state for signalling the host.
- */
- newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
-
- if (vmbus_proto_version != VERSION_WS2008) {
- newchannel->is_dedicated_interrupt =
- (offer->is_dedicated_interrupt != 0);
- newchannel->sig_event = offer->connection_id;
- }
-
- memcpy(&newchannel->offermsg, offer,
- sizeof(struct vmbus_channel_offer_channel));
- newchannel->monitor_grp = (u8)offer->monitorid / 32;
- newchannel->monitor_bit = (u8)offer->monitorid % 32;
+ vmbus_setup_channel_state(newchannel, offer);
vmbus_process_offer(newchannel);
}
+static void check_ready_for_suspend_event(void)
+{
+ /*
+ * If all the sub-channels or hv_sock channels have been cleaned up,
+ * then it's safe to suspend.
+ */
+ if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
+ complete(&vmbus_connection.ready_for_suspend_event);
+}
+
/*
* vmbus_onoffer_rescind - Rescind offer handler.
*
@@ -898,6 +1013,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
struct vmbus_channel_rescind_offer *rescind;
struct vmbus_channel *channel;
struct device *dev;
+ bool clean_up_chan_for_suspend;
rescind = (struct vmbus_channel_rescind_offer *)hdr;
@@ -937,6 +1053,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
return;
}
+ clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
+ is_sub_channel(channel);
/*
* Before setting channel->rescind in vmbus_rescind_cleanup(), we
* should make sure the channel callback is not running any more.
@@ -962,6 +1080,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
if (channel->device_obj) {
if (channel->chn_rescind_callback) {
channel->chn_rescind_callback(channel);
+
+ if (clean_up_chan_for_suspend)
+ check_ready_for_suspend_event();
+
return;
}
/*
@@ -994,6 +1116,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
}
mutex_unlock(&vmbus_connection.channel_mutex);
}
+
+ /* The "channel" may have been freed. Do not access it any longer. */
+
+ if (clean_up_chan_for_suspend)
+ check_ready_for_suspend_event();
}
void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
@@ -1224,6 +1351,8 @@ channel_message_table[CHANNELMSG_COUNT] = {
{ CHANNELMSG_19, 0, NULL },
{ CHANNELMSG_20, 0, NULL },
{ CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL },
+ { CHANNELMSG_22, 0, NULL },
+ { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL },
};
/*
@@ -1235,25 +1364,16 @@ void vmbus_onmessage(void *context)
{
struct hv_message *msg = context;
struct vmbus_channel_message_header *hdr;
- int size;
hdr = (struct vmbus_channel_message_header *)msg->u.payload;
- size = msg->header.payload_size;
trace_vmbus_on_message(hdr);
- if (hdr->msgtype >= CHANNELMSG_COUNT) {
- pr_err("Received invalid channel message type %d size %d\n",
- hdr->msgtype, size);
- print_hex_dump_bytes("", DUMP_PREFIX_NONE,
- (unsigned char *)msg->u.payload, size);
- return;
- }
-
- if (channel_message_table[hdr->msgtype].message_handler)
- channel_message_table[hdr->msgtype].message_handler(hdr);
- else
- pr_err("Unhandled channel message type %d\n", hdr->msgtype);
+ /*
+ * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
+ * out of bound and the message_handler pointer can not be NULL.
+ */
+ channel_message_table[hdr->msgtype].message_handler(hdr);
}
/*
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 09829e15d4a0..74e77de89b4f 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -14,6 +14,7 @@
#include <linux/wait.h>
#include <linux/delay.h>
#include <linux/mm.h>
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/hyperv.h>
@@ -26,6 +27,11 @@
struct vmbus_connection vmbus_connection = {
.conn_state = DISCONNECTED,
.next_gpadl_handle = ATOMIC_INIT(0xE1E10),
+
+ .ready_for_suspend_event= COMPLETION_INITIALIZER(
+ vmbus_connection.ready_for_suspend_event),
+ .ready_for_resume_event = COMPLETION_INITIALIZER(
+ vmbus_connection.ready_for_resume_event),
};
EXPORT_SYMBOL_GPL(vmbus_connection);
@@ -35,32 +41,32 @@ EXPORT_SYMBOL_GPL(vmbus_connection);
__u32 vmbus_proto_version;
EXPORT_SYMBOL_GPL(vmbus_proto_version);
-static __u32 vmbus_get_next_version(__u32 current_version)
-{
- switch (current_version) {
- case (VERSION_WIN7):
- return VERSION_WS2008;
-
- case (VERSION_WIN8):
- return VERSION_WIN7;
-
- case (VERSION_WIN8_1):
- return VERSION_WIN8;
-
- case (VERSION_WIN10):
- return VERSION_WIN8_1;
+/*
+ * Table of VMBus versions listed from newest to oldest.
+ */
+static __u32 vmbus_versions[] = {
+ VERSION_WIN10_V5_2,
+ VERSION_WIN10_V5_1,
+ VERSION_WIN10_V5,
+ VERSION_WIN10_V4_1,
+ VERSION_WIN10,
+ VERSION_WIN8_1,
+ VERSION_WIN8,
+ VERSION_WIN7,
+ VERSION_WS2008
+};
- case (VERSION_WIN10_V5):
- return VERSION_WIN10;
+/*
+ * Maximal VMBus protocol version guests can negotiate. Useful to cap the
+ * VMBus version for testing and debugging purpose.
+ */
+static uint max_version = VERSION_WIN10_V5_2;
- case (VERSION_WS2008):
- default:
- return VERSION_INVAL;
- }
-}
+module_param(max_version, uint, S_IRUGO);
+MODULE_PARM_DESC(max_version,
+ "Maximal VMBus protocol version which can be negotiated");
-static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
- __u32 version)
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
{
int ret = 0;
unsigned int cur_cpu;
@@ -76,12 +82,12 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
msg->vmbus_version_requested = version;
/*
- * VMBus protocol 5.0 (VERSION_WIN10_V5) requires that we must use
- * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message,
+ * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must
+ * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message,
* and for subsequent messages, we must use the Message Connection ID
* field in the host-returned Version Response Message. And, with
- * VERSION_WIN10_V5, we don't use msg->interrupt_page, but we tell
- * the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for
+ * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we
+ * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for
* compatibility.
*
* On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1).
@@ -165,8 +171,8 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
*/
int vmbus_connect(void)
{
- int ret = 0;
struct vmbus_channel_msginfo *msginfo = NULL;
+ int i, ret = 0;
__u32 version;
/* Initialize the vmbus connection */
@@ -202,7 +208,7 @@ int vmbus_connect(void)
* abstraction stuff
*/
vmbus_connection.int_page =
- (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0);
+ (void *)hv_alloc_hyperv_zeroed_page();
if (vmbus_connection.int_page == NULL) {
ret = -ENOMEM;
goto cleanup;
@@ -211,14 +217,14 @@ int vmbus_connect(void)
vmbus_connection.recv_int_page = vmbus_connection.int_page;
vmbus_connection.send_int_page =
(void *)((unsigned long)vmbus_connection.int_page +
- (PAGE_SIZE >> 1));
+ (HV_HYP_PAGE_SIZE >> 1));
/*
* Setup the monitor notification facility. The 1st page for
* parent->child and the 2nd page for child->parent
*/
- vmbus_connection.monitor_pages[0] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0);
- vmbus_connection.monitor_pages[1] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0);
+ vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page();
+ vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page();
if ((vmbus_connection.monitor_pages[0] == NULL) ||
(vmbus_connection.monitor_pages[1] == NULL)) {
ret = -ENOMEM;
@@ -240,21 +246,21 @@ int vmbus_connect(void)
* version.
*/
- version = VERSION_CURRENT;
+ for (i = 0; ; i++) {
+ if (i == ARRAY_SIZE(vmbus_versions))
+ goto cleanup;
+
+ version = vmbus_versions[i];
+ if (version > max_version)
+ continue;
- do {
ret = vmbus_negotiate_version(msginfo, version);
if (ret == -ETIMEDOUT)
goto cleanup;
if (vmbus_connection.conn_state == CONNECTED)
break;
-
- version = vmbus_get_next_version(version);
- } while (version != VERSION_INVAL);
-
- if (version == VERSION_INVAL)
- goto cleanup;
+ }
vmbus_proto_version = version;
pr_info("Vmbus version:%d.%d\n",
@@ -291,12 +297,12 @@ void vmbus_disconnect(void)
destroy_workqueue(vmbus_connection.work_queue);
if (vmbus_connection.int_page) {
- free_pages((unsigned long)vmbus_connection.int_page, 0);
+ hv_free_hyperv_page((unsigned long)vmbus_connection.int_page);
vmbus_connection.int_page = NULL;
}
- free_pages((unsigned long)vmbus_connection.monitor_pages[0], 0);
- free_pages((unsigned long)vmbus_connection.monitor_pages[1], 0);
+ hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]);
+ hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]);
vmbus_connection.monitor_pages[0] = NULL;
vmbus_connection.monitor_pages[1] = NULL;
}
@@ -357,6 +363,7 @@ void vmbus_on_event(unsigned long data)
trace_vmbus_on_event(channel);
+ hv_debug_delay_test(channel, INTERRUPT_DELAY);
do {
void (*callback_fn)(void *);
@@ -409,7 +416,7 @@ int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep)
case HV_STATUS_INVALID_CONNECTION_ID:
/*
* See vmbus_negotiate_version(): VMBus protocol 5.0
- * requires that we must use
+ * and higher require that we must use
* VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate
* Contact message, but on old hosts that only
* support VMBus protocol 4.0 or lower, here we get
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 6188fb7dda42..6098e0cbdb4b 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -154,7 +154,7 @@ void hv_synic_free(void)
* retrieve the initialized message and event pages. Otherwise, we create and
* initialize the message and event pages.
*/
-int hv_synic_init(unsigned int cpu)
+void hv_synic_enable_regs(unsigned int cpu)
{
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
@@ -196,8 +196,13 @@ int hv_synic_init(unsigned int cpu)
sctrl.enable = 1;
hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_init(unsigned int cpu)
+{
+ hv_synic_enable_regs(cpu);
- hv_stimer_init(cpu);
+ hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
return 0;
}
@@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu)
/*
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
*/
-int hv_synic_cleanup(unsigned int cpu)
+void hv_synic_disable_regs(unsigned int cpu)
{
union hv_synic_sint shared_sint;
union hv_synic_simp simp;
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;
+
+ hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ shared_sint.masked = 1;
+
+ /* Need to correctly cleanup in the case of SMP!!! */
+ /* Disable the interrupt */
+ hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ hv_get_simp(simp.as_uint64);
+ simp.simp_enabled = 0;
+ simp.base_simp_gpa = 0;
+
+ hv_set_simp(simp.as_uint64);
+
+ hv_get_siefp(siefp.as_uint64);
+ siefp.siefp_enabled = 0;
+ siefp.base_siefp_gpa = 0;
+
+ hv_set_siefp(siefp.as_uint64);
+
+ /* Disable the global synic bit */
+ hv_get_synic_state(sctrl.as_uint64);
+ sctrl.enable = 0;
+ hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_cleanup(unsigned int cpu)
+{
struct vmbus_channel *channel, *sc;
bool channel_found = false;
unsigned long flags;
- hv_get_synic_state(sctrl.as_uint64);
- if (sctrl.enable != 1)
- return -EFAULT;
-
/*
* Search for channels which are bound to the CPU we're about to
* cleanup. In case we find one and vmbus is still connected we need to
@@ -247,31 +277,9 @@ int hv_synic_cleanup(unsigned int cpu)
if (channel_found && vmbus_connection.conn_state == CONNECTED)
return -EBUSY;
- hv_stimer_cleanup(cpu);
-
- hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- shared_sint.masked = 1;
-
- /* Need to correctly cleanup in the case of SMP!!! */
- /* Disable the interrupt */
- hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- hv_get_simp(simp.as_uint64);
- simp.simp_enabled = 0;
- simp.base_simp_gpa = 0;
-
- hv_set_simp(simp.as_uint64);
+ hv_stimer_legacy_cleanup(cpu);
- hv_get_siefp(siefp.as_uint64);
- siefp.siefp_enabled = 0;
- siefp.base_siefp_gpa = 0;
-
- hv_set_siefp(siefp.as_uint64);
-
- /* Disable the global synic bit */
- sctrl.enable = 0;
- hv_set_synic_state(sctrl.as_uint64);
+ hv_synic_disable_regs(cpu);
return 0;
}
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 6fb4ea5f0304..a02ce43d778d 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -23,6 +23,9 @@
#include <linux/percpu_counter.h>
#include <linux/hyperv.h>
+#include <asm/hyperv-tlfs.h>
+
+#include <asm/mshyperv.h>
#define CREATE_TRACE_POINTS
#include "hv_trace_balloon.h"
@@ -341,8 +344,6 @@ struct dm_unballoon_response {
*
* mem_range: Memory range to hot add.
*
- * On Linux we currently don't support this since we cannot hot add
- * arbitrary granularity of memory.
*/
struct dm_hot_add {
@@ -457,6 +458,7 @@ struct hot_add_wrk {
struct work_struct wrk;
};
+static bool allow_hibernation;
static bool hot_add = true;
static bool do_hot_add;
/*
@@ -477,7 +479,7 @@ module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR));
MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure");
static atomic_t trans_id = ATOMIC_INIT(0);
-static int dm_ring_size = (5 * PAGE_SIZE);
+static int dm_ring_size = 20 * 1024;
/*
* Driver specific state.
@@ -493,10 +495,10 @@ enum hv_dm_state {
};
-static __u8 recv_buffer[PAGE_SIZE];
-static __u8 *send_buffer;
-#define PAGES_IN_2M 512
-#define HA_CHUNK (32 * 1024)
+static __u8 recv_buffer[HV_HYP_PAGE_SIZE];
+static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE];
+#define PAGES_IN_2M (2 * 1024 * 1024 / PAGE_SIZE)
+#define HA_CHUNK (128 * 1024 * 1024 / PAGE_SIZE)
struct hv_dynmem_device {
struct hv_device *dev;
@@ -680,9 +682,7 @@ static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
__ClearPageOffline(pg);
/* This frame is currently backed; online the page. */
- __online_page_set_limits(pg);
- __online_page_increment_counters(pg);
- __online_page_free(pg);
+ generic_online_page(pg, 0);
lockdep_assert_held(&dm_device.ha_lock);
dm_device.num_pages_onlined++;
@@ -1053,8 +1053,12 @@ static void hot_add_req(struct work_struct *dummy)
else
resp.result = 0;
- if (!do_hot_add || (resp.page_count == 0))
- pr_err("Memory hot add failed\n");
+ if (!do_hot_add || resp.page_count == 0) {
+ if (!allow_hibernation)
+ pr_err("Memory hot add failed\n");
+ else
+ pr_info("Ignore hot-add request!\n");
+ }
dm->state = DM_INITIALIZED;
resp.hdr.trans_id = atomic_inc_return(&trans_id);
@@ -1076,7 +1080,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
__u64 *max_page_count = (__u64 *)&info_hdr[1];
pr_info("Max. dynamic memory size: %llu MB\n",
- (*max_page_count) >> (20 - PAGE_SHIFT));
+ (*max_page_count) >> (20 - HV_HYP_PAGE_SHIFT));
}
break;
@@ -1213,12 +1217,9 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
unsigned int i, j;
struct page *pg;
- if (num_pages < alloc_unit)
- return 0;
-
- for (i = 0; (i * alloc_unit) < num_pages; i++) {
+ for (i = 0; i < num_pages / alloc_unit; i++) {
if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) >
- PAGE_SIZE)
+ HV_HYP_PAGE_SIZE)
return i * alloc_unit;
/*
@@ -1254,7 +1255,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
}
- return num_pages;
+ return i * alloc_unit;
}
static void balloon_up(struct work_struct *dummy)
@@ -1269,31 +1270,27 @@ static void balloon_up(struct work_struct *dummy)
long avail_pages;
unsigned long floor;
- /* The host balloons pages in 2M granularity. */
- WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0);
-
/*
* We will attempt 2M allocations. However, if we fail to
- * allocate 2M chunks, we will go back to 4k allocations.
+ * allocate 2M chunks, we will go back to PAGE_SIZE allocations.
*/
- alloc_unit = 512;
+ alloc_unit = PAGES_IN_2M;
avail_pages = si_mem_available();
floor = compute_balloon_floor();
- /* Refuse to balloon below the floor, keep the 2M granularity. */
+ /* Refuse to balloon below the floor. */
if (avail_pages < num_pages || avail_pages - num_pages < floor) {
pr_warn("Balloon request will be partially fulfilled. %s\n",
avail_pages < num_pages ? "Not enough memory." :
"Balloon floor reached.");
num_pages = avail_pages > floor ? (avail_pages - floor) : 0;
- num_pages -= num_pages % PAGES_IN_2M;
}
while (!done) {
- bl_resp = (struct dm_balloon_response *)send_buffer;
- memset(send_buffer, 0, PAGE_SIZE);
+ memset(balloon_up_send_buffer, 0, HV_HYP_PAGE_SIZE);
+ bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer;
bl_resp->hdr.type = DM_BALLOON_RESPONSE;
bl_resp->hdr.size = sizeof(struct dm_balloon_response);
bl_resp->more_pages = 1;
@@ -1491,7 +1488,7 @@ static void balloon_onchannelcallback(void *context)
memset(recv_buffer, 0, sizeof(recv_buffer));
vmbus_recvpacket(dev->channel, recv_buffer,
- PAGE_SIZE, &recvlen, &requestid);
+ HV_HYP_PAGE_SIZE, &recvlen, &requestid);
if (recvlen > 0) {
dm_msg = (struct dm_message *)recv_buffer;
@@ -1509,6 +1506,11 @@ static void balloon_onchannelcallback(void *context)
break;
case DM_BALLOON_REQUEST:
+ if (allow_hibernation) {
+ pr_info("Ignore balloon-up request!\n");
+ break;
+ }
+
if (dm->state == DM_BALLOON_UP)
pr_warn("Currently ballooning\n");
bal_msg = (struct dm_balloon *)recv_buffer;
@@ -1518,6 +1520,11 @@ static void balloon_onchannelcallback(void *context)
break;
case DM_UNBALLOON_REQUEST:
+ if (allow_hibernation) {
+ pr_info("Ignore balloon-down request!\n");
+ break;
+ }
+
dm->state = DM_BALLOON_DOWN;
balloon_down(dm,
(struct dm_unballoon_request *)recv_buffer);
@@ -1564,58 +1571,18 @@ static void balloon_onchannelcallback(void *context)
}
-static int balloon_probe(struct hv_device *dev,
- const struct hv_vmbus_device_id *dev_id)
+static int balloon_connect_vsp(struct hv_device *dev)
{
- int ret;
- unsigned long t;
struct dm_version_request version_req;
struct dm_capabilities cap_msg;
-
-#ifdef CONFIG_MEMORY_HOTPLUG
- do_hot_add = hot_add;
-#else
- do_hot_add = false;
-#endif
-
- /*
- * First allocate a send buffer.
- */
-
- send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!send_buffer)
- return -ENOMEM;
+ unsigned long t;
+ int ret;
ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
- balloon_onchannelcallback, dev);
-
+ balloon_onchannelcallback, dev);
if (ret)
- goto probe_error0;
-
- dm_device.dev = dev;
- dm_device.state = DM_INITIALIZING;
- dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
- init_completion(&dm_device.host_event);
- init_completion(&dm_device.config_event);
- INIT_LIST_HEAD(&dm_device.ha_region_list);
- spin_lock_init(&dm_device.ha_lock);
- INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
- INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
- dm_device.host_specified_ha_region = false;
-
- dm_device.thread =
- kthread_run(dm_thread_func, &dm_device, "hv_balloon");
- if (IS_ERR(dm_device.thread)) {
- ret = PTR_ERR(dm_device.thread);
- goto probe_error1;
- }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
- set_online_page_callback(&hv_online_page);
- register_memory_notifier(&hv_memory_nb);
-#endif
+ return ret;
- hv_set_drvdata(dev, &dm_device);
/*
* Initiate the hand shake with the host and negotiate
* a version that the host can support. We start with the
@@ -1631,16 +1598,15 @@ static int balloon_probe(struct hv_device *dev,
dm_device.version = version_req.version.version;
ret = vmbus_sendpacket(dev->channel, &version_req,
- sizeof(struct dm_version_request),
- (unsigned long)NULL,
- VM_PKT_DATA_INBAND, 0);
+ sizeof(struct dm_version_request),
+ (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
if (ret)
- goto probe_error2;
+ goto out;
t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
- goto probe_error2;
+ goto out;
}
/*
@@ -1648,8 +1614,8 @@ static int balloon_probe(struct hv_device *dev,
* fail the probe function.
*/
if (dm_device.state == DM_INIT_ERROR) {
- ret = -ETIMEDOUT;
- goto probe_error2;
+ ret = -EPROTO;
+ goto out;
}
pr_info("Using Dynamic Memory protocol version %u.%u\n",
@@ -1664,6 +1630,11 @@ static int balloon_probe(struct hv_device *dev,
cap_msg.hdr.size = sizeof(struct dm_capabilities);
cap_msg.hdr.trans_id = atomic_inc_return(&trans_id);
+ /*
+ * When hibernation (i.e. virtual ACPI S4 state) is enabled, the host
+ * currently still requires the bits to be set, so we have to add code
+ * to fail the host's hot-add and balloon up/down requests, if any.
+ */
cap_msg.caps.cap_bits.balloon = 1;
cap_msg.caps.cap_bits.hot_add = 1;
@@ -1682,16 +1653,15 @@ static int balloon_probe(struct hv_device *dev,
cap_msg.max_page_number = -1;
ret = vmbus_sendpacket(dev->channel, &cap_msg,
- sizeof(struct dm_capabilities),
- (unsigned long)NULL,
- VM_PKT_DATA_INBAND, 0);
+ sizeof(struct dm_capabilities),
+ (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
if (ret)
- goto probe_error2;
+ goto out;
t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
- goto probe_error2;
+ goto out;
}
/*
@@ -1699,25 +1669,71 @@ static int balloon_probe(struct hv_device *dev,
* fail the probe function.
*/
if (dm_device.state == DM_INIT_ERROR) {
- ret = -ETIMEDOUT;
- goto probe_error2;
+ ret = -EPROTO;
+ goto out;
}
+ return 0;
+out:
+ vmbus_close(dev->channel);
+ return ret;
+}
+
+static int balloon_probe(struct hv_device *dev,
+ const struct hv_vmbus_device_id *dev_id)
+{
+ int ret;
+
+ allow_hibernation = hv_is_hibernation_supported();
+ if (allow_hibernation)
+ hot_add = false;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ do_hot_add = hot_add;
+#else
+ do_hot_add = false;
+#endif
+ dm_device.dev = dev;
+ dm_device.state = DM_INITIALIZING;
+ dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
+ init_completion(&dm_device.host_event);
+ init_completion(&dm_device.config_event);
+ INIT_LIST_HEAD(&dm_device.ha_region_list);
+ spin_lock_init(&dm_device.ha_lock);
+ INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
+ INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
+ dm_device.host_specified_ha_region = false;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ set_online_page_callback(&hv_online_page);
+ register_memory_notifier(&hv_memory_nb);
+#endif
+
+ hv_set_drvdata(dev, &dm_device);
+
+ ret = balloon_connect_vsp(dev);
+ if (ret != 0)
+ return ret;
+
dm_device.state = DM_INITIALIZED;
- last_post_time = jiffies;
+
+ dm_device.thread =
+ kthread_run(dm_thread_func, &dm_device, "hv_balloon");
+ if (IS_ERR(dm_device.thread)) {
+ ret = PTR_ERR(dm_device.thread);
+ goto probe_error;
+ }
return 0;
-probe_error2:
+probe_error:
+ dm_device.state = DM_INIT_ERROR;
+ dm_device.thread = NULL;
+ vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
+ unregister_memory_notifier(&hv_memory_nb);
restore_online_page_callback(&hv_online_page);
#endif
- kthread_stop(dm_device.thread);
-
-probe_error1:
- vmbus_close(dev->channel);
-probe_error0:
- kfree(send_buffer);
return ret;
}
@@ -1734,12 +1750,11 @@ static int balloon_remove(struct hv_device *dev)
cancel_work_sync(&dm->balloon_wrk.wrk);
cancel_work_sync(&dm->ha_wrk.wrk);
- vmbus_close(dev->channel);
kthread_stop(dm->thread);
- kfree(send_buffer);
+ vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
- restore_online_page_callback(&hv_online_page);
unregister_memory_notifier(&hv_memory_nb);
+ restore_online_page_callback(&hv_online_page);
#endif
spin_lock_irqsave(&dm_device.ha_lock, flags);
list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
@@ -1755,6 +1770,59 @@ static int balloon_remove(struct hv_device *dev)
return 0;
}
+static int balloon_suspend(struct hv_device *hv_dev)
+{
+ struct hv_dynmem_device *dm = hv_get_drvdata(hv_dev);
+
+ tasklet_disable(&hv_dev->channel->callback_event);
+
+ cancel_work_sync(&dm->balloon_wrk.wrk);
+ cancel_work_sync(&dm->ha_wrk.wrk);
+
+ if (dm->thread) {
+ kthread_stop(dm->thread);
+ dm->thread = NULL;
+ vmbus_close(hv_dev->channel);
+ }
+
+ tasklet_enable(&hv_dev->channel->callback_event);
+
+ return 0;
+
+}
+
+static int balloon_resume(struct hv_device *dev)
+{
+ int ret;
+
+ dm_device.state = DM_INITIALIZING;
+
+ ret = balloon_connect_vsp(dev);
+
+ if (ret != 0)
+ goto out;
+
+ dm_device.thread =
+ kthread_run(dm_thread_func, &dm_device, "hv_balloon");
+ if (IS_ERR(dm_device.thread)) {
+ ret = PTR_ERR(dm_device.thread);
+ dm_device.thread = NULL;
+ goto close_channel;
+ }
+
+ dm_device.state = DM_INITIALIZED;
+ return 0;
+close_channel:
+ vmbus_close(dev->channel);
+out:
+ dm_device.state = DM_INIT_ERROR;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ unregister_memory_notifier(&hv_memory_nb);
+ restore_online_page_callback(&hv_online_page);
+#endif
+ return ret;
+}
+
static const struct hv_vmbus_device_id id_table[] = {
/* Dynamic Memory Class ID */
/* 525074DC-8985-46e2-8057-A307DC18A502 */
@@ -1769,6 +1837,8 @@ static struct hv_driver balloon_drv = {
.id_table = id_table,
.probe = balloon_probe,
.remove = balloon_remove,
+ .suspend = balloon_suspend,
+ .resume = balloon_resume,
.driver = {
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
},
diff --git a/drivers/hv/hv_debugfs.c b/drivers/hv/hv_debugfs.c
new file mode 100644
index 000000000000..8a2878573582
--- /dev/null
+++ b/drivers/hv/hv_debugfs.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Authors:
+ * Branden Bonaby <brandonbonaby94@gmail.com>
+ */
+
+#include <linux/hyperv.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+
+#include "hyperv_vmbus.h"
+
+struct dentry *hv_debug_root;
+
+static int hv_debugfs_delay_get(void *data, u64 *val)
+{
+ *val = *(u32 *)data;
+ return 0;
+}
+
+static int hv_debugfs_delay_set(void *data, u64 val)
+{
+ if (val > 1000)
+ return -EINVAL;
+ *(u32 *)data = val;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(hv_debugfs_delay_fops, hv_debugfs_delay_get,
+ hv_debugfs_delay_set, "%llu\n");
+
+static int hv_debugfs_state_get(void *data, u64 *val)
+{
+ *val = *(bool *)data;
+ return 0;
+}
+
+static int hv_debugfs_state_set(void *data, u64 val)
+{
+ if (val == 1)
+ *(bool *)data = true;
+ else if (val == 0)
+ *(bool *)data = false;
+ else
+ return -EINVAL;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(hv_debugfs_state_fops, hv_debugfs_state_get,
+ hv_debugfs_state_set, "%llu\n");
+
+/* Setup delay files to store test values */
+static int hv_debug_delay_files(struct hv_device *dev, struct dentry *root)
+{
+ struct vmbus_channel *channel = dev->channel;
+ char *buffer = "fuzz_test_buffer_interrupt_delay";
+ char *message = "fuzz_test_message_delay";
+ int *buffer_val = &channel->fuzz_testing_interrupt_delay;
+ int *message_val = &channel->fuzz_testing_message_delay;
+ struct dentry *buffer_file, *message_file;
+
+ buffer_file = debugfs_create_file(buffer, 0644, root,
+ buffer_val,
+ &hv_debugfs_delay_fops);
+ if (IS_ERR(buffer_file)) {
+ pr_debug("debugfs_hyperv: file %s not created\n", buffer);
+ return PTR_ERR(buffer_file);
+ }
+
+ message_file = debugfs_create_file(message, 0644, root,
+ message_val,
+ &hv_debugfs_delay_fops);
+ if (IS_ERR(message_file)) {
+ pr_debug("debugfs_hyperv: file %s not created\n", message);
+ return PTR_ERR(message_file);
+ }
+
+ return 0;
+}
+
+/* Setup test state value for vmbus device */
+static int hv_debug_set_test_state(struct hv_device *dev, struct dentry *root)
+{
+ struct vmbus_channel *channel = dev->channel;
+ bool *state = &channel->fuzz_testing_state;
+ char *status = "fuzz_test_state";
+ struct dentry *test_state;
+
+ test_state = debugfs_create_file(status, 0644, root,
+ state,
+ &hv_debugfs_state_fops);
+ if (IS_ERR(test_state)) {
+ pr_debug("debugfs_hyperv: file %s not created\n", status);
+ return PTR_ERR(test_state);
+ }
+
+ return 0;
+}
+
+/* Bind hv device to a dentry for debugfs */
+static void hv_debug_set_dir_dentry(struct hv_device *dev, struct dentry *root)
+{
+ if (hv_debug_root)
+ dev->debug_dir = root;
+}
+
+/* Create all test dentry's and names for fuzz testing */
+int hv_debug_add_dev_dir(struct hv_device *dev)
+{
+ const char *device = dev_name(&dev->device);
+ char *delay_name = "delay";
+ struct dentry *delay, *dev_root;
+ int ret;
+
+ if (!IS_ERR(hv_debug_root)) {
+ dev_root = debugfs_create_dir(device, hv_debug_root);
+ if (IS_ERR(dev_root)) {
+ pr_debug("debugfs_hyperv: hyperv/%s/ not created\n",
+ device);
+ return PTR_ERR(dev_root);
+ }
+ hv_debug_set_test_state(dev, dev_root);
+ hv_debug_set_dir_dentry(dev, dev_root);
+ delay = debugfs_create_dir(delay_name, dev_root);
+
+ if (IS_ERR(delay)) {
+ pr_debug("debugfs_hyperv: hyperv/%s/%s/ not created\n",
+ device, delay_name);
+ return PTR_ERR(delay);
+ }
+ ret = hv_debug_delay_files(dev, delay);
+
+ return ret;
+ }
+ pr_debug("debugfs_hyperv: hyperv/ not in root debugfs path\n");
+ return PTR_ERR(hv_debug_root);
+}
+
+/* Remove dentry associated with released hv device */
+void hv_debug_rm_dev_dir(struct hv_device *dev)
+{
+ if (!IS_ERR(hv_debug_root))
+ debugfs_remove_recursive(dev->debug_dir);
+}
+
+/* Remove all dentrys associated with vmbus testing */
+void hv_debug_rm_all_dir(void)
+{
+ debugfs_remove_recursive(hv_debug_root);
+}
+
+/* Delay buffer/message reads on a vmbus channel */
+void hv_debug_delay_test(struct vmbus_channel *channel, enum delay delay_type)
+{
+ struct vmbus_channel *test_channel = channel->primary_channel ?
+ channel->primary_channel :
+ channel;
+ bool state = test_channel->fuzz_testing_state;
+
+ if (state) {
+ if (delay_type == 0)
+ udelay(test_channel->fuzz_testing_interrupt_delay);
+ else
+ udelay(test_channel->fuzz_testing_message_delay);
+ }
+}
+
+/* Initialize top dentry for vmbus testing */
+int hv_debug_init(void)
+{
+ hv_debug_root = debugfs_create_dir("hyperv", NULL);
+ if (IS_ERR(hv_debug_root)) {
+ pr_debug("debugfs_hyperv: hyperv/ not created\n");
+ return PTR_ERR(hv_debug_root);
+ }
+ return 0;
+}
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 7e30ae0635cc..bb9ba3f7c794 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -13,6 +13,7 @@
#include <linux/workqueue.h>
#include <linux/hyperv.h>
#include <linux/sched.h>
+#include <asm/hyperv-tlfs.h>
#include "hyperv_vmbus.h"
#include "hv_utils_transport.h"
@@ -234,7 +235,7 @@ void hv_fcopy_onchannelcallback(void *context)
if (fcopy_transaction.state > HVUTIL_READY)
return;
- vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 2, &recvlen,
+ vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen,
&requestid);
if (recvlen <= 0)
return;
@@ -345,9 +346,61 @@ int hv_fcopy_init(struct hv_util_service *srv)
return 0;
}
+static void hv_fcopy_cancel_work(void)
+{
+ cancel_delayed_work_sync(&fcopy_timeout_work);
+ cancel_work_sync(&fcopy_send_work);
+}
+
+int hv_fcopy_pre_suspend(void)
+{
+ struct vmbus_channel *channel = fcopy_transaction.recv_channel;
+ struct hv_fcopy_hdr *fcopy_msg;
+
+ /*
+ * Fake a CANCEL_FCOPY message for the user space daemon in case the
+ * daemon is in the middle of copying some file. It doesn't matter if
+ * there is already a message pending to be delivered to the user
+ * space since we force fcopy_transaction.state to be HVUTIL_READY, so
+ * the user space daemon's write() will fail with EINVAL (see
+ * fcopy_on_msg()), and the daemon will reset the device by closing
+ * and re-opening it.
+ */
+ fcopy_msg = kzalloc(sizeof(*fcopy_msg), GFP_KERNEL);
+ if (!fcopy_msg)
+ return -ENOMEM;
+
+ tasklet_disable(&channel->callback_event);
+
+ fcopy_msg->operation = CANCEL_FCOPY;
+
+ hv_fcopy_cancel_work();
+
+ /* We don't care about the return value. */
+ hvutil_transport_send(hvt, fcopy_msg, sizeof(*fcopy_msg), NULL);
+
+ kfree(fcopy_msg);
+
+ fcopy_transaction.state = HVUTIL_READY;
+
+ /* tasklet_enable() will be called in hv_fcopy_pre_resume(). */
+ return 0;
+}
+
+int hv_fcopy_pre_resume(void)
+{
+ struct vmbus_channel *channel = fcopy_transaction.recv_channel;
+
+ tasklet_enable(&channel->callback_event);
+
+ return 0;
+}
+
void hv_fcopy_deinit(void)
{
fcopy_transaction.state = HVUTIL_DEVICE_DYING;
- cancel_delayed_work_sync(&fcopy_timeout_work);
+
+ hv_fcopy_cancel_work();
+
hvutil_transport_destroy(hvt);
}
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 5054d1105236..e74b144b8f3d 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -27,6 +27,7 @@
#include <linux/connector.h>
#include <linux/workqueue.h>
#include <linux/hyperv.h>
+#include <asm/hyperv-tlfs.h>
#include "hyperv_vmbus.h"
#include "hv_utils_transport.h"
@@ -661,7 +662,7 @@ void hv_kvp_onchannelcallback(void *context)
if (kvp_transaction.state > HVUTIL_READY)
return;
- vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 4, &recvlen,
+ vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 4, &recvlen,
&requestid);
if (recvlen > 0) {
@@ -757,11 +758,50 @@ hv_kvp_init(struct hv_util_service *srv)
return 0;
}
-void hv_kvp_deinit(void)
+static void hv_kvp_cancel_work(void)
{
- kvp_transaction.state = HVUTIL_DEVICE_DYING;
cancel_delayed_work_sync(&kvp_host_handshake_work);
cancel_delayed_work_sync(&kvp_timeout_work);
cancel_work_sync(&kvp_sendkey_work);
+}
+
+int hv_kvp_pre_suspend(void)
+{
+ struct vmbus_channel *channel = kvp_transaction.recv_channel;
+
+ tasklet_disable(&channel->callback_event);
+
+ /*
+ * If there is a pending transtion, it's unnecessary to tell the host
+ * that the transaction will fail, because that is implied when
+ * util_suspend() calls vmbus_close() later.
+ */
+ hv_kvp_cancel_work();
+
+ /*
+ * Forece the state to READY to handle the ICMSGTYPE_NEGOTIATE message
+ * later. The user space daemon may go out of order and its write()
+ * may fail with EINVAL: this doesn't matter since the daemon will
+ * reset the device by closing and re-opening it.
+ */
+ kvp_transaction.state = HVUTIL_READY;
+ return 0;
+}
+
+int hv_kvp_pre_resume(void)
+{
+ struct vmbus_channel *channel = kvp_transaction.recv_channel;
+
+ tasklet_enable(&channel->callback_event);
+
+ return 0;
+}
+
+void hv_kvp_deinit(void)
+{
+ kvp_transaction.state = HVUTIL_DEVICE_DYING;
+
+ hv_kvp_cancel_work();
+
hvutil_transport_destroy(hvt);
}
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 20ba95b75a94..1c75b38f0d6d 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -12,6 +12,7 @@
#include <linux/connector.h>
#include <linux/workqueue.h>
#include <linux/hyperv.h>
+#include <asm/hyperv-tlfs.h>
#include "hyperv_vmbus.h"
#include "hv_utils_transport.h"
@@ -297,7 +298,7 @@ void hv_vss_onchannelcallback(void *context)
if (vss_transaction.state > HVUTIL_READY)
return;
- vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 2, &recvlen,
+ vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen,
&requestid);
if (recvlen > 0) {
@@ -378,10 +379,61 @@ hv_vss_init(struct hv_util_service *srv)
return 0;
}
-void hv_vss_deinit(void)
+static void hv_vss_cancel_work(void)
{
- vss_transaction.state = HVUTIL_DEVICE_DYING;
cancel_delayed_work_sync(&vss_timeout_work);
cancel_work_sync(&vss_handle_request_work);
+}
+
+int hv_vss_pre_suspend(void)
+{
+ struct vmbus_channel *channel = vss_transaction.recv_channel;
+ struct hv_vss_msg *vss_msg;
+
+ /*
+ * Fake a THAW message for the user space daemon in case the daemon
+ * has frozen the file systems. It doesn't matter if there is already
+ * a message pending to be delivered to the user space since we force
+ * vss_transaction.state to be HVUTIL_READY, so the user space daemon's
+ * write() will fail with EINVAL (see vss_on_msg()), and the daemon
+ * will reset the device by closing and re-opening it.
+ */
+ vss_msg = kzalloc(sizeof(*vss_msg), GFP_KERNEL);
+ if (!vss_msg)
+ return -ENOMEM;
+
+ tasklet_disable(&channel->callback_event);
+
+ vss_msg->vss_hdr.operation = VSS_OP_THAW;
+
+ /* Cancel any possible pending work. */
+ hv_vss_cancel_work();
+
+ /* We don't care about the return value. */
+ hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
+
+ kfree(vss_msg);
+
+ vss_transaction.state = HVUTIL_READY;
+
+ /* tasklet_enable() will be called in hv_vss_pre_resume(). */
+ return 0;
+}
+
+int hv_vss_pre_resume(void)
+{
+ struct vmbus_channel *channel = vss_transaction.recv_channel;
+
+ tasklet_enable(&channel->callback_event);
+
+ return 0;
+}
+
+void hv_vss_deinit(void)
+{
+ vss_transaction.state = HVUTIL_DEVICE_DYING;
+
+ hv_vss_cancel_work();
+
hvutil_transport_destroy(hvt);
}
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index e32681ee7b9f..92ee0fe4c919 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -24,6 +24,10 @@
#define SD_MAJOR 3
#define SD_MINOR 0
+#define SD_MINOR_1 1
+#define SD_MINOR_2 2
+#define SD_VERSION_3_1 (SD_MAJOR << 16 | SD_MINOR_1)
+#define SD_VERSION_3_2 (SD_MAJOR << 16 | SD_MINOR_2)
#define SD_VERSION (SD_MAJOR << 16 | SD_MINOR)
#define SD_MAJOR_1 1
@@ -50,8 +54,10 @@ static int sd_srv_version;
static int ts_srv_version;
static int hb_srv_version;
-#define SD_VER_COUNT 2
+#define SD_VER_COUNT 4
static const int sd_versions[] = {
+ SD_VERSION_3_2,
+ SD_VERSION_3_1,
SD_VERSION,
SD_VERSION_1
};
@@ -75,18 +81,56 @@ static const int fw_versions[] = {
UTIL_WS2K8_FW_VERSION
};
+/*
+ * Send the "hibernate" udev event in a thread context.
+ */
+struct hibernate_work_context {
+ struct work_struct work;
+ struct hv_device *dev;
+};
+
+static struct hibernate_work_context hibernate_context;
+static bool hibernation_supported;
+
+static void send_hibernate_uevent(struct work_struct *work)
+{
+ char *uevent_env[2] = { "EVENT=hibernate", NULL };
+ struct hibernate_work_context *ctx;
+
+ ctx = container_of(work, struct hibernate_work_context, work);
+
+ kobject_uevent_env(&ctx->dev->device.kobj, KOBJ_CHANGE, uevent_env);
+
+ pr_info("Sent hibernation uevent\n");
+}
+
+static int hv_shutdown_init(struct hv_util_service *srv)
+{
+ struct vmbus_channel *channel = srv->channel;
+
+ INIT_WORK(&hibernate_context.work, send_hibernate_uevent);
+ hibernate_context.dev = channel->device_obj;
+
+ hibernation_supported = hv_is_hibernation_supported();
+
+ return 0;
+}
+
static void shutdown_onchannelcallback(void *context);
static struct hv_util_service util_shutdown = {
.util_cb = shutdown_onchannelcallback,
+ .util_init = hv_shutdown_init,
};
static int hv_timesync_init(struct hv_util_service *srv);
+static int hv_timesync_pre_suspend(void);
static void hv_timesync_deinit(void);
static void timesync_onchannelcallback(void *context);
static struct hv_util_service util_timesynch = {
.util_cb = timesync_onchannelcallback,
.util_init = hv_timesync_init,
+ .util_pre_suspend = hv_timesync_pre_suspend,
.util_deinit = hv_timesync_deinit,
};
@@ -98,18 +142,24 @@ static struct hv_util_service util_heartbeat = {
static struct hv_util_service util_kvp = {
.util_cb = hv_kvp_onchannelcallback,
.util_init = hv_kvp_init,
+ .util_pre_suspend = hv_kvp_pre_suspend,
+ .util_pre_resume = hv_kvp_pre_resume,
.util_deinit = hv_kvp_deinit,
};
static struct hv_util_service util_vss = {
.util_cb = hv_vss_onchannelcallback,
.util_init = hv_vss_init,
+ .util_pre_suspend = hv_vss_pre_suspend,
+ .util_pre_resume = hv_vss_pre_resume,
.util_deinit = hv_vss_deinit,
};
static struct hv_util_service util_fcopy = {
.util_cb = hv_fcopy_onchannelcallback,
.util_init = hv_fcopy_init,
+ .util_pre_suspend = hv_fcopy_pre_suspend,
+ .util_pre_resume = hv_fcopy_pre_resume,
.util_deinit = hv_fcopy_deinit,
};
@@ -118,17 +168,27 @@ static void perform_shutdown(struct work_struct *dummy)
orderly_poweroff(true);
}
+static void perform_restart(struct work_struct *dummy)
+{
+ orderly_reboot();
+}
+
/*
* Perform the shutdown operation in a thread context.
*/
static DECLARE_WORK(shutdown_work, perform_shutdown);
+/*
+ * Perform the restart operation in a thread context.
+ */
+static DECLARE_WORK(restart_work, perform_restart);
+
static void shutdown_onchannelcallback(void *context)
{
struct vmbus_channel *channel = context;
+ struct work_struct *work = NULL;
u32 recvlen;
u64 requestid;
- bool execute_shutdown = false;
u8 *shut_txf_buf = util_shutdown.recv_buffer;
struct shutdown_msg_data *shutdown_msg;
@@ -136,7 +196,7 @@ static void shutdown_onchannelcallback(void *context)
struct icmsg_hdr *icmsghdrp;
vmbus_recvpacket(channel, shut_txf_buf,
- PAGE_SIZE, &recvlen, &requestid);
+ HV_HYP_PAGE_SIZE, &recvlen, &requestid);
if (recvlen > 0) {
icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[
@@ -157,19 +217,37 @@ static void shutdown_onchannelcallback(void *context)
sizeof(struct vmbuspipe_hdr) +
sizeof(struct icmsg_hdr)];
+ /*
+ * shutdown_msg->flags can be 0(shut down), 2(reboot),
+ * or 4(hibernate). It may bitwise-OR 1, which means
+ * performing the request by force. Linux always tries
+ * to perform the request by force.
+ */
switch (shutdown_msg->flags) {
case 0:
case 1:
icmsghdrp->status = HV_S_OK;
- execute_shutdown = true;
-
+ work = &shutdown_work;
pr_info("Shutdown request received -"
" graceful shutdown initiated\n");
break;
+ case 2:
+ case 3:
+ icmsghdrp->status = HV_S_OK;
+ work = &restart_work;
+ pr_info("Restart request received -"
+ " graceful restart initiated\n");
+ break;
+ case 4:
+ case 5:
+ pr_info("Hibernation request received\n");
+ icmsghdrp->status = hibernation_supported ?
+ HV_S_OK : HV_E_FAIL;
+ if (hibernation_supported)
+ work = &hibernate_context.work;
+ break;
default:
icmsghdrp->status = HV_E_FAIL;
- execute_shutdown = false;
-
pr_info("Shutdown request received -"
" Invalid request\n");
break;
@@ -184,8 +262,8 @@ static void shutdown_onchannelcallback(void *context)
VM_PKT_DATA_INBAND, 0);
}
- if (execute_shutdown == true)
- schedule_work(&shutdown_work);
+ if (work)
+ schedule_work(work);
}
/*
@@ -211,7 +289,7 @@ static struct timespec64 hv_get_adj_host_time(void)
unsigned long flags;
spin_lock_irqsave(&host_ts.lock, flags);
- reftime = hyperv_cs->read(hyperv_cs);
+ reftime = hv_read_reference_counter();
newtime = host_ts.host_time + (reftime - host_ts.ref_time);
ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100);
spin_unlock_irqrestore(&host_ts.lock, flags);
@@ -250,7 +328,7 @@ static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags)
*/
spin_lock_irqsave(&host_ts.lock, flags);
- cur_reftime = hyperv_cs->read(hyperv_cs);
+ cur_reftime = hv_read_reference_counter();
host_ts.host_time = hosttime;
host_ts.ref_time = cur_reftime;
@@ -284,7 +362,7 @@ static void timesync_onchannelcallback(void *context)
u8 *time_txf_buf = util_timesynch.recv_buffer;
vmbus_recvpacket(channel, time_txf_buf,
- PAGE_SIZE, &recvlen, &requestid);
+ HV_HYP_PAGE_SIZE, &recvlen, &requestid);
if (recvlen > 0) {
icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[
@@ -315,7 +393,7 @@ static void timesync_onchannelcallback(void *context)
sizeof(struct vmbuspipe_hdr) +
sizeof(struct icmsg_hdr)];
adj_guesttime(timedatap->parenttime,
- hyperv_cs->read(hyperv_cs),
+ hv_read_reference_counter(),
timedatap->flags);
}
}
@@ -346,7 +424,7 @@ static void heartbeat_onchannelcallback(void *context)
while (1) {
vmbus_recvpacket(channel, hbeat_txf_buf,
- PAGE_SIZE, &recvlen, &requestid);
+ HV_HYP_PAGE_SIZE, &recvlen, &requestid);
if (!recvlen)
break;
@@ -390,7 +468,7 @@ static int util_probe(struct hv_device *dev,
(struct hv_util_service *)dev_id->driver_data;
int ret;
- srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+ srv->recv_buffer = kmalloc(HV_HYP_PAGE_SIZE * 4, GFP_KERNEL);
if (!srv->recv_buffer)
return -ENOMEM;
srv->channel = dev->channel;
@@ -413,8 +491,9 @@ static int util_probe(struct hv_device *dev,
hv_set_drvdata(dev, srv);
- ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
- srv->util_cb, dev->channel);
+ ret = vmbus_open(dev->channel, 4 * HV_HYP_PAGE_SIZE,
+ 4 * HV_HYP_PAGE_SIZE, NULL, 0, srv->util_cb,
+ dev->channel);
if (ret)
goto error;
@@ -440,6 +519,44 @@ static int util_remove(struct hv_device *dev)
return 0;
}
+/*
+ * When we're in util_suspend(), all the userspace processes have been frozen
+ * (refer to hibernate() -> freeze_processes()). The userspace is thawed only
+ * after the whole resume procedure, including util_resume(), finishes.
+ */
+static int util_suspend(struct hv_device *dev)
+{
+ struct hv_util_service *srv = hv_get_drvdata(dev);
+ int ret = 0;
+
+ if (srv->util_pre_suspend) {
+ ret = srv->util_pre_suspend();
+ if (ret)
+ return ret;
+ }
+
+ vmbus_close(dev->channel);
+
+ return 0;
+}
+
+static int util_resume(struct hv_device *dev)
+{
+ struct hv_util_service *srv = hv_get_drvdata(dev);
+ int ret = 0;
+
+ if (srv->util_pre_resume) {
+ ret = srv->util_pre_resume();
+ if (ret)
+ return ret;
+ }
+
+ ret = vmbus_open(dev->channel, 4 * HV_HYP_PAGE_SIZE,
+ 4 * HV_HYP_PAGE_SIZE, NULL, 0, srv->util_cb,
+ dev->channel);
+ return ret;
+}
+
static const struct hv_vmbus_device_id id_table[] = {
/* Shutdown guid */
{ HV_SHUTDOWN_GUID,
@@ -476,6 +593,8 @@ static struct hv_driver util_drv = {
.id_table = id_table,
.probe = util_probe,
.remove = util_remove,
+ .suspend = util_suspend,
+ .resume = util_resume,
.driver = {
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
},
@@ -523,7 +642,7 @@ static struct ptp_clock *hv_ptp_clock;
static int hv_timesync_init(struct hv_util_service *srv)
{
/* TimeSync requires Hyper-V clocksource. */
- if (!hyperv_cs)
+ if (!hv_read_reference_counter)
return -ENODEV;
spin_lock_init(&host_ts.lock);
@@ -545,11 +664,23 @@ static int hv_timesync_init(struct hv_util_service *srv)
return 0;
}
+static void hv_timesync_cancel_work(void)
+{
+ cancel_work_sync(&adj_time_work);
+}
+
+static int hv_timesync_pre_suspend(void)
+{
+ hv_timesync_cancel_work();
+ return 0;
+}
+
static void hv_timesync_deinit(void)
{
if (hv_ptp_clock)
ptp_clock_unregister(hv_ptp_clock);
- cancel_work_sync(&adj_time_work);
+
+ hv_timesync_cancel_work();
}
static int __init init_hyperv_utils(void)
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 362e70e9d145..f5fa3b3c9baf 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -146,8 +146,6 @@ struct hv_context {
*/
u64 guestid;
- void *tsc_page;
-
struct hv_per_cpu_context __percpu *cpu_context;
/*
@@ -171,8 +169,10 @@ extern int hv_synic_alloc(void);
extern void hv_synic_free(void);
+extern void hv_synic_enable_regs(unsigned int cpu);
extern int hv_synic_init(unsigned int cpu);
+extern void hv_synic_disable_regs(unsigned int cpu);
extern int hv_synic_cleanup(unsigned int cpu);
/* Interface */
@@ -192,11 +192,11 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
u64 *requestid, bool raw);
/*
- * Maximum channels is determined by the size of the interrupt page
- * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt
- * and the other is receive endpoint interrupt
+ * The Maximum number of channels (16348) is determined by the size of the
+ * interrupt page, which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to
+ * send endpoint interrupts, and the other is to receive endpoint interrupts.
*/
-#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */
+#define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3)
/* The value here must be in multiple of 32 */
/* TODO: Need to make this configurable */
@@ -258,6 +258,32 @@ struct vmbus_connection {
struct workqueue_struct *work_queue;
struct workqueue_struct *handle_primary_chan_wq;
struct workqueue_struct *handle_sub_chan_wq;
+
+ /*
+ * The number of sub-channels and hv_sock channels that should be
+ * cleaned up upon suspend: sub-channels will be re-created upon
+ * resume, and hv_sock channels should not survive suspend.
+ */
+ atomic_t nr_chan_close_on_suspend;
+ /*
+ * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
+ * drop to zero.
+ */
+ struct completion ready_for_suspend_event;
+
+ /*
+ * The number of primary channels that should be "fixed up"
+ * upon resume: these channels are re-offered upon resume, and some
+ * fields of the channel offers (i.e. child_relid and connection_id)
+ * can change, so the old offermsg must be fixed up, before the resume
+ * callbacks of the VSC drivers start to further touch the channels.
+ */
+ atomic_t nr_chan_fixup_on_resume;
+ /*
+ * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
+ * drop to zero.
+ */
+ struct completion ready_for_resume_event;
};
@@ -272,6 +298,8 @@ struct vmbus_msginfo {
extern struct vmbus_connection vmbus_connection;
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
+
static inline void vmbus_send_interrupt(u32 relid)
{
sync_set_bit(relid, vmbus_connection.send_int_page);
@@ -324,14 +352,20 @@ void vmbus_on_msg_dpc(unsigned long data);
int hv_kvp_init(struct hv_util_service *srv);
void hv_kvp_deinit(void);
+int hv_kvp_pre_suspend(void);
+int hv_kvp_pre_resume(void);
void hv_kvp_onchannelcallback(void *context);
int hv_vss_init(struct hv_util_service *srv);
void hv_vss_deinit(void);
+int hv_vss_pre_suspend(void);
+int hv_vss_pre_resume(void);
void hv_vss_onchannelcallback(void *context);
int hv_fcopy_init(struct hv_util_service *srv);
void hv_fcopy_deinit(void);
+int hv_fcopy_pre_suspend(void);
+int hv_fcopy_pre_resume(void);
void hv_fcopy_onchannelcallback(void *context);
void vmbus_initiate_unload(bool crash);
@@ -357,4 +391,35 @@ enum hvutil_device_state {
HVUTIL_DEVICE_DYING, /* driver unload is in progress */
};
+enum delay {
+ INTERRUPT_DELAY = 0,
+ MESSAGE_DELAY = 1,
+};
+
+#ifdef CONFIG_HYPERV_TESTING
+
+int hv_debug_add_dev_dir(struct hv_device *dev);
+void hv_debug_rm_dev_dir(struct hv_device *dev);
+void hv_debug_rm_all_dir(void);
+int hv_debug_init(void);
+void hv_debug_delay_test(struct vmbus_channel *channel, enum delay delay_type);
+
+#else /* CONFIG_HYPERV_TESTING */
+
+static inline void hv_debug_rm_dev_dir(struct hv_device *dev) {};
+static inline void hv_debug_rm_all_dir(void) {};
+static inline void hv_debug_delay_test(struct vmbus_channel *channel,
+ enum delay delay_type) {};
+static inline int hv_debug_init(void)
+{
+ return -1;
+}
+
+static inline int hv_debug_add_dev_dir(struct hv_device *dev)
+{
+ return -1;
+}
+
+#endif /* CONFIG_HYPERV_TESTING */
+
#endif /* _HYPERV_VMBUS_H */
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 9a03b163cbbd..356e22159e83 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -396,6 +396,7 @@ struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
struct hv_ring_buffer_info *rbi = &channel->inbound;
struct vmpacket_descriptor *desc;
+ hv_debug_delay_test(channel, MESSAGE_DELAY);
if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
return NULL;
@@ -421,6 +422,7 @@ __hv_pkt_iter_next(struct vmbus_channel *channel,
u32 packetlen = desc->len8 << 3;
u32 dsize = rbi->ring_datasize;
+ hv_debug_delay_test(channel, MESSAGE_DELAY);
/* bump offset to next potential packet */
rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
if (rbi->priv_read_index >= dsize)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ebd35fc35290..029378c27421 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -24,12 +24,14 @@
#include <linux/sched/task_stack.h>
#include <asm/mshyperv.h>
+#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/ptrace.h>
#include <linux/screen_info.h>
#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
+#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
#include "hyperv_vmbus.h"
@@ -77,7 +79,7 @@ static struct notifier_block hyperv_panic_block = {
static const char *fb_mmio_name = "fb_range";
static struct resource *fb_mmio;
static struct resource *hyperv_mmio;
-static DEFINE_SEMAPHORE(hyperv_mmio_lock);
+static DEFINE_MUTEX(hyperv_mmio_lock);
static int vmbus_exists(void)
{
@@ -910,6 +912,45 @@ static void vmbus_shutdown(struct device *child_device)
drv->shutdown(dev);
}
+#ifdef CONFIG_PM_SLEEP
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->suspend)
+ return -EOPNOTSUPP;
+
+ return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->resume)
+ return -EOPNOTSUPP;
+
+ return drv->resume(dev);
+}
+#endif /* CONFIG_PM_SLEEP */
/*
* vmbus_device_release - Final callback release of the vmbus child device
@@ -919,12 +960,22 @@ static void vmbus_device_release(struct device *device)
struct hv_device *hv_dev = device_to_hv_device(device);
struct vmbus_channel *channel = hv_dev->channel;
+ hv_debug_rm_dev_dir(hv_dev);
+
mutex_lock(&vmbus_connection.channel_mutex);
hv_process_channel_removal(channel);
mutex_unlock(&vmbus_connection.channel_mutex);
kfree(hv_dev);
}
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
/* The one and only one */
static struct bus_type hv_bus = {
.name = "vmbus",
@@ -935,6 +986,7 @@ static struct bus_type hv_bus = {
.uevent = vmbus_uevent,
.dev_groups = vmbus_dev_groups,
.drv_groups = vmbus_drv_groups,
+ .pm = &vmbus_pm,
};
struct onmessage_work_context {
@@ -981,6 +1033,10 @@ void vmbus_on_msg_dpc(unsigned long data)
}
entry = &channel_message_table[hdr->msgtype];
+
+ if (!entry->message_handler)
+ goto msg_handled;
+
if (entry->handler_type == VMHT_BLOCKING) {
ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
if (ctx == NULL)
@@ -1022,6 +1078,43 @@ msg_handled:
vmbus_signal_eom(msg, message_type);
}
+#ifdef CONFIG_PM_SLEEP
+/*
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
+ * hibernation, because hv_sock connections can not persist across hibernation.
+ */
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
+{
+ struct onmessage_work_context *ctx;
+ struct vmbus_channel_rescind_offer *rescind;
+
+ WARN_ON(!is_hvsock_channel(channel));
+
+ /*
+ * sizeof(*ctx) is small and the allocation should really not fail,
+ * otherwise the state of the hv_sock connections ends up in limbo.
+ */
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
+
+ /*
+ * So far, these are not really used by Linux. Just set them to the
+ * reasonable values conforming to the definitions of the fields.
+ */
+ ctx->msg.header.message_type = 1;
+ ctx->msg.header.payload_size = sizeof(*rescind);
+
+ /* These values are actually used by Linux. */
+ rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
+ rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
+ rescind->child_relid = channel->offermsg.child_relid;
+
+ INIT_WORK(&ctx->work, vmbus_onmessage_work);
+
+ queue_work_on(vmbus_connection.connect_cpu,
+ vmbus_connection.work_queue,
+ &ctx->work);
+}
+#endif /* CONFIG_PM_SLEEP */
/*
* Direct callback for channels using other deferred processing
@@ -1186,7 +1279,7 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
* Write dump contents to the page. No need to synchronize; panic should
* be single-threaded.
*/
- kmsg_dump_get_buffer(dumper, true, hv_panic_page, PAGE_SIZE,
+ kmsg_dump_get_buffer(dumper, true, hv_panic_page, HV_HYP_PAGE_SIZE,
&bytes_written);
if (bytes_written)
hyperv_report_panic_msg(panic_pa, bytes_written);
@@ -1253,10 +1346,6 @@ static int vmbus_bus_init(void)
if (ret)
goto err_alloc;
- ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
- if (ret < 0)
- goto err_alloc;
-
/*
* Initialize the per-cpu interrupt state and stimer state.
* Then connect to the host.
@@ -1290,7 +1379,7 @@ static int vmbus_bus_init(void)
*/
hv_get_crash_ctl(hyperv_crash_ctl);
if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) {
- hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL);
+ hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page();
if (hv_panic_page) {
ret = kmsg_dump_register(&hv_kmsg_dumper);
if (ret)
@@ -1313,13 +1402,12 @@ static int vmbus_bus_init(void)
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
err_cpuhp:
- hv_stimer_free();
-err_alloc:
hv_synic_free();
+err_alloc:
hv_remove_vmbus_irq();
bus_unregister(&hv_bus);
- free_page((unsigned long)hv_panic_page);
+ hv_free_hyperv_page((unsigned long)hv_panic_page);
unregister_sysctl_table(hv_ctl_table_hdr);
hv_ctl_table_hdr = NULL;
return ret;
@@ -1727,6 +1815,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
pr_err("Unable to register primary channeln");
goto err_kset_unregister;
}
+ hv_debug_add_dev_dir(child_device_obj);
return 0;
@@ -1928,7 +2017,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
int retval;
retval = -ENXIO;
- down(&hyperv_mmio_lock);
+ mutex_lock(&hyperv_mmio_lock);
/*
* If overlaps with frame buffers are allowed, then first attempt to
@@ -1975,7 +2064,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
}
exit:
- up(&hyperv_mmio_lock);
+ mutex_unlock(&hyperv_mmio_lock);
return retval;
}
EXPORT_SYMBOL_GPL(vmbus_allocate_mmio);
@@ -1992,7 +2081,7 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size)
{
struct resource *iter;
- down(&hyperv_mmio_lock);
+ mutex_lock(&hyperv_mmio_lock);
for (iter = hyperv_mmio; iter; iter = iter->sibling) {
if ((iter->start >= start + size) || (iter->end <= start))
continue;
@@ -2000,7 +2089,7 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size)
__release_region(iter, start, size);
}
release_mem_region(start, size);
- up(&hyperv_mmio_lock);
+ mutex_unlock(&hyperv_mmio_lock);
}
EXPORT_SYMBOL_GPL(vmbus_free_mmio);
@@ -2042,6 +2131,130 @@ acpi_walk_err:
return ret_val;
}
+#ifdef CONFIG_PM_SLEEP
+static int vmbus_bus_suspend(struct device *dev)
+{
+ struct vmbus_channel *channel, *sc;
+ unsigned long flags;
+
+ while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
+ /*
+ * We wait here until the completion of any channel
+ * offers that are currently in progress.
+ */
+ msleep(1);
+ }
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (!is_hvsock_channel(channel))
+ continue;
+
+ vmbus_force_channel_rescinded(channel);
+ }
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ /*
+ * Wait until all the sub-channels and hv_sock channels have been
+ * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
+ * they would conflict with the new sub-channels that will be created
+ * in the resume path. hv_sock channels should also be destroyed, but
+ * a hv_sock channel of an established hv_sock connection can not be
+ * really destroyed since it may still be referenced by the userspace
+ * application, so we just force the hv_sock channel to be rescinded
+ * by vmbus_force_channel_rescinded(), and the userspace application
+ * will thoroughly destroy the channel after hibernation.
+ *
+ * Note: the counter nr_chan_close_on_suspend may never go above 0 if
+ * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
+ */
+ if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
+ wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ /*
+ * Invalidate the field. Upon resume, vmbus_onoffer() will fix
+ * up the field, and the other fields (if necessary).
+ */
+ channel->offermsg.child_relid = INVALID_RELID;
+
+ if (is_hvsock_channel(channel)) {
+ if (!channel->rescind) {
+ pr_err("hv_sock channel not rescinded!\n");
+ WARN_ON_ONCE(1);
+ }
+ continue;
+ }
+
+ spin_lock_irqsave(&channel->lock, flags);
+ list_for_each_entry(sc, &channel->sc_list, sc_list) {
+ pr_err("Sub-channel not deleted!\n");
+ WARN_ON_ONCE(1);
+ }
+ spin_unlock_irqrestore(&channel->lock, flags);
+
+ atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
+ }
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ vmbus_initiate_unload(false);
+
+ vmbus_connection.conn_state = DISCONNECTED;
+
+ /* Reset the event for the next resume. */
+ reinit_completion(&vmbus_connection.ready_for_resume_event);
+
+ return 0;
+}
+
+static int vmbus_bus_resume(struct device *dev)
+{
+ struct vmbus_channel_msginfo *msginfo;
+ size_t msgsize;
+ int ret;
+
+ /*
+ * We only use the 'vmbus_proto_version', which was in use before
+ * hibernation, to re-negotiate with the host.
+ */
+ if (!vmbus_proto_version) {
+ pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
+ return -EINVAL;
+ }
+
+ msgsize = sizeof(*msginfo) +
+ sizeof(struct vmbus_channel_initiate_contact);
+
+ msginfo = kzalloc(msgsize, GFP_KERNEL);
+
+ if (msginfo == NULL)
+ return -ENOMEM;
+
+ ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
+
+ kfree(msginfo);
+
+ if (ret != 0)
+ return ret;
+
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
+
+ vmbus_request_offers();
+
+ wait_for_completion(&vmbus_connection.ready_for_resume_event);
+
+ /* Reset the event for the next suspend. */
+ reinit_completion(&vmbus_connection.ready_for_suspend_event);
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
static const struct acpi_device_id vmbus_acpi_device_ids[] = {
{"VMBUS", 0},
{"VMBus", 0},
@@ -2049,6 +2262,19 @@ static const struct acpi_device_id vmbus_acpi_device_ids[] = {
};
MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the
+ * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the
+ * pci "noirq" restore callback runs before "non-noirq" callbacks (see
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
+ * resume callback must also run via the "noirq" callbacks.
+ */
+static const struct dev_pm_ops vmbus_bus_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
+};
+
static struct acpi_driver vmbus_acpi_driver = {
.name = "vmbus",
.ids = vmbus_acpi_device_ids,
@@ -2056,6 +2282,7 @@ static struct acpi_driver vmbus_acpi_driver = {
.add = vmbus_acpi_add,
.remove = vmbus_acpi_remove,
},
+ .drv.pm = &vmbus_bus_pm,
};
static void hv_kexec_handler(void)
@@ -2082,10 +2309,54 @@ static void hv_crash_handler(struct pt_regs *regs)
vmbus_connection.conn_state = DISCONNECTED;
cpu = smp_processor_id();
hv_stimer_cleanup(cpu);
- hv_synic_cleanup(cpu);
+ hv_synic_disable_regs(cpu);
hyperv_cleanup();
};
+static int hv_synic_suspend(void)
+{
+ /*
+ * When we reach here, all the non-boot CPUs have been offlined.
+ * If we're in a legacy configuration where stimer Direct Mode is
+ * not enabled, the stimers on the non-boot CPUs have been unbound
+ * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() ->
+ * hv_stimer_cleanup() -> clockevents_unbind_device().
+ *
+ * hv_synic_suspend() only runs on CPU0 with interrupts disabled.
+ * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because:
+ * 1) it's unnecessary as interrupts remain disabled between
+ * syscore_suspend() and syscore_resume(): see create_image() and
+ * resume_target_kernel()
+ * 2) the stimer on CPU0 is automatically disabled later by
+ * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
+ * -> clockevents_shutdown() -> ... -> hv_ce_shutdown()
+ * 3) a warning would be triggered if we call
+ * clockevents_unbind_device(), which may sleep, in an
+ * interrupts-disabled context.
+ */
+
+ hv_synic_disable_regs(0);
+
+ return 0;
+}
+
+static void hv_synic_resume(void)
+{
+ hv_synic_enable_regs(0);
+
+ /*
+ * Note: we don't need to call hv_stimer_init(0), because the timer
+ * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
+ * automatically re-enabled in timekeeping_resume().
+ */
+}
+
+/* The callbacks run only on CPU0, with irqs_disabled. */
+static struct syscore_ops hv_synic_syscore_ops = {
+ .suspend = hv_synic_suspend,
+ .resume = hv_synic_resume,
+};
+
static int __init hv_acpi_init(void)
{
int ret, t;
@@ -2108,6 +2379,7 @@ static int __init hv_acpi_init(void)
ret = -ETIMEDOUT;
goto cleanup;
}
+ hv_debug_init();
ret = vmbus_bus_init();
if (ret)
@@ -2116,6 +2388,8 @@ static int __init hv_acpi_init(void)
hv_setup_kexec_handler(hv_kexec_handler);
hv_setup_crash_handler(hv_crash_handler);
+ register_syscore_ops(&hv_synic_syscore_ops);
+
return 0;
cleanup:
@@ -2128,6 +2402,8 @@ static void __exit vmbus_exit(void)
{
int cpu;
+ unregister_syscore_ops(&hv_synic_syscore_ops);
+
hv_remove_kexec_handler();
hv_remove_crash_handler();
vmbus_connection.conn_state = DISCONNECTED;
@@ -2140,6 +2416,8 @@ static void __exit vmbus_exit(void)
tasklet_kill(&hv_cpu->msg_dpc);
}
+ hv_debug_rm_all_dir();
+
vmbus_free_channels();
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
OpenPOWER on IntegriCloud