8 files changed, 473 insertions, 272 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 2978f5ee8d2a..54da66dc7d16 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -71,7 +71,8 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	struct vmbus_channel_msginfo *open_info = NULL;
 	void *in, *out;
 	unsigned long flags;
-	int ret, t, err = 0;
+	int ret, err = 0;
+	unsigned long t;
 
 	spin_lock_irqsave(&newchannel->lock, flags);
 	if (newchannel->state == CHANNEL_OPEN_STATE) {
@@ -89,9 +90,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
 		get_order(send_ringbuffer_size + recv_ringbuffer_size));
 
-	if (!out)
-		return -ENOMEM;
-
+	if (!out) {
+		err = -ENOMEM;
+		goto error0;
+	}
 
 	in = (void *)((unsigned long)out + send_ringbuffer_size);
 
@@ -135,7 +137,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 			   GFP_KERNEL);
 	if (!open_info) {
 		err = -ENOMEM;
-		goto error0;
+		goto error_gpadl;
 	}
 
 	init_completion(&open_info->waitevent);
@@ -151,7 +153,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (userdatalen > MAX_USER_DEFINED_BYTES) {
 		err = -EINVAL;
-		goto error0;
+		goto error_gpadl;
 	}
 
 	if (userdatalen)
@@ -195,10 +197,14 @@ error1:
 	list_del(&open_info->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 
+error_gpadl:
+	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
+
 error0:
 	free_pages((unsigned long)out,
 		get_order(send_ringbuffer_size + recv_ringbuffer_size));
 	kfree(open_info);
+	newchannel->state = CHANNEL_OPEN_STATE;
 	return err;
 }
 EXPORT_SYMBOL_GPL(vmbus_open);
@@ -534,6 +540,12 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	free_pages((unsigned long)channel->ringbuffer_pages,
 		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
 
+	/*
+	 * If the channel has been rescinded; process device removal.
+	 */
+	if (channel->rescind)
+		hv_process_channel_removal(channel,
+					   channel->offermsg.child_relid);
 	return ret;
 }
 
@@ -569,23 +581,9 @@ void vmbus_close(struct vmbus_channel *channel)
 }
 EXPORT_SYMBOL_GPL(vmbus_close);
 
-/**
- * vmbus_sendpacket() - Send the specified buffer on the given channel
- * @channel: Pointer to vmbus_channel structure.
- * @buffer: Pointer to the buffer you want to receive the data into.
- * @bufferlen: Maximum size of what the the buffer will hold
- * @requestid: Identifier of the request
- * @type: Type of packet that is being send e.g. negotiate, time
- * packet etc.
- *
- * Sends data in @buffer directly to hyper-v via the vmbus
- * This will send the data unparsed to hyper-v.
- *
- * Mainly used by Hyper-V drivers.
- */
-int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
+int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 			   u32 bufferlen, u64 requestid,
-			   enum vmbus_packet_type type, u32 flags)
+			   enum vmbus_packet_type type, u32 flags, bool kick_q)
 {
 	struct vmpacket_descriptor desc;
 	u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen;
@@ -613,21 +611,61 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
 
-	if (ret == 0 && signal)
+	/*
+	 * Signalling the host is conditional on many factors:
+	 * 1. The ring state changed from being empty to non-empty.
+	 *    This is tracked by the variable "signal".
+	 * 2. The variable kick_q tracks if more data will be placed
+	 *    on the ring. We will not signal if more data is
+	 *    to be placed.
+	 *
+	 * If we cannot write to the ring-buffer; signal the host
+	 * even if we may not have written anything. This is a rare
+	 * enough condition that it should not matter.
+	 */
+	if (((ret == 0) && kick_q && signal) || (ret))
 		vmbus_setevent(channel);
 
 	return ret;
 }
+EXPORT_SYMBOL(vmbus_sendpacket_ctl);
+
+/**
+ * vmbus_sendpacket() - Send the specified buffer on the given channel
+ * @channel: Pointer to vmbus_channel structure.
+ * @buffer: Pointer to the buffer you want to receive the data into.
+ * @bufferlen: Maximum size of what the the buffer will hold
+ * @requestid: Identifier of the request
+ * @type: Type of packet that is being send e.g. negotiate, time
+ * packet etc.
+ *
+ * Sends data in @buffer directly to hyper-v via the vmbus
+ * This will send the data unparsed to hyper-v.
+ *
+ * Mainly used by Hyper-V drivers.
+ */
+int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
+			   u32 bufferlen, u64 requestid,
+			   enum vmbus_packet_type type, u32 flags)
+{
+	return vmbus_sendpacket_ctl(channel, buffer, bufferlen, requestid,
+				    type, flags, true);
+}
 EXPORT_SYMBOL(vmbus_sendpacket);
 
 /*
- * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
- * packets using a GPADL Direct packet type.
+ * vmbus_sendpacket_pagebuffer_ctl - Send a range of single-page buffer
+ * packets using a GPADL Direct packet type. This interface allows you
+ * to control notifying the host. This will be useful for sending
+ * batched data. Also the sender can control the send flags
+ * explicitly.
  */
-int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
+int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 				     struct hv_page_buffer pagebuffers[],
 				     u32 pagecount, void *buffer, u32 bufferlen,
-				     u64 requestid)
+				     u64 requestid,
+				     u32 flags,
+				     bool kick_q)
 {
 	int ret;
 	int i;
@@ -655,7 +693,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
 
 	/* Setup the descriptor */
 	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
-	desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+	desc.flags = flags;
 	desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */
 	desc.length8 = (u16)(packetlen_aligned >> 3);
 	desc.transactionid = requestid;
@@ -676,11 +714,40 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
 
-	if (ret == 0 && signal)
+	/*
+	 * Signalling the host is conditional on many factors:
+	 * 1. The ring state changed from being empty to non-empty.
+	 *    This is tracked by the variable "signal".
+	 * 2. The variable kick_q tracks if more data will be placed
+	 *    on the ring. We will not signal if more data is
+	 *    to be placed.
+	 *
+	 * If we cannot write to the ring-buffer; signal the host
+	 * even if we may not have written anything. This is a rare
+	 * enough condition that it should not matter.
+	 */
+	if (((ret == 0) && kick_q && signal) || (ret))
 		vmbus_setevent(channel);
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl);
+
+/*
+ * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
+ * packets using a GPADL Direct packet type.
+ */
+int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
+				     struct hv_page_buffer pagebuffers[],
+				     u32 pagecount, void *buffer, u32 bufferlen,
+				     u64 requestid)
+{
+	u32 flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+	return vmbus_sendpacket_pagebuffer_ctl(channel, pagebuffers, pagecount,
+					       buffer, bufferlen, requestid,
+					       flags, true);
+
+}
 EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
 
 /*
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 3736f71bdec5..0eeb1b3bc048 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -32,12 +32,6 @@
 
 #include "hyperv_vmbus.h"
 
-struct vmbus_channel_message_table_entry {
-	enum vmbus_channel_message_type message_type;
-	void (*message_handler)(struct vmbus_channel_message_header *msg);
-};
-
-
 /**
  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
  * @icmsghdrp: Pointer to msg header structure
@@ -139,54 +133,29 @@ EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
  */
 static struct vmbus_channel *alloc_channel(void)
 {
+	static atomic_t chan_num = ATOMIC_INIT(0);
 	struct vmbus_channel *channel;
 
 	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 	if (!channel)
 		return NULL;
 
+	channel->id = atomic_inc_return(&chan_num);
 	spin_lock_init(&channel->inbound_lock);
 	spin_lock_init(&channel->lock);
 
 	INIT_LIST_HEAD(&channel->sc_list);
 	INIT_LIST_HEAD(&channel->percpu_list);
 
-	channel->controlwq = create_workqueue("hv_vmbus_ctl");
-	if (!channel->controlwq) {
-		kfree(channel);
-		return NULL;
-	}
-
 	return channel;
 }
 
 /*
- * release_hannel - Release the vmbus channel object itself
- */
-static void release_channel(struct work_struct *work)
-{
-	struct vmbus_channel *channel = container_of(work,
-						     struct vmbus_channel,
-						     work);
-
-	destroy_workqueue(channel->controlwq);
-
-	kfree(channel);
-}
-
-/*
  * free_channel - Release the resources used by the vmbus channel object
  */
 static void free_channel(struct vmbus_channel *channel)
 {
-
-	/*
-	 * We have to release the channel's workqueue/thread in the vmbus's
-	 * workqueue/thread context
-	 * ie we can't destroy ourselves.
-	 */
-	INIT_WORK(&channel->work, release_channel);
-	queue_work(vmbus_connection.work_queue, &channel->work);
+	kfree(channel);
 }
 
 static void percpu_channel_enq(void *arg)
@@ -204,33 +173,21 @@ static void percpu_channel_deq(void *arg)
 	list_del(&channel->percpu_list);
 }
 
-/*
- * vmbus_process_rescind_offer -
- * Rescind the offer by initiating a device removal
- */
-static void vmbus_process_rescind_offer(struct work_struct *work)
+
+void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 {
-	struct vmbus_channel *channel = container_of(work,
-						     struct vmbus_channel,
-						     work);
+	struct vmbus_channel_relid_released msg;
 	unsigned long flags;
 	struct vmbus_channel *primary_channel;
-	struct vmbus_channel_relid_released msg;
-	struct device *dev;
-
-	if (channel->device_obj) {
-		dev = get_device(&channel->device_obj->device);
-		if (dev) {
-			vmbus_device_unregister(channel->device_obj);
-			put_device(dev);
-		}
-	}
 
 	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
-	msg.child_relid = channel->offermsg.child_relid;
+	msg.child_relid = relid;
 	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
 
+	if (channel == NULL)
+		return;
+
 	if (channel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(channel->target_cpu,
@@ -259,7 +216,6 @@ void vmbus_free_channels(void)
 
 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 		vmbus_device_unregister(channel->device_obj);
-		kfree(channel->device_obj);
 		free_channel(channel);
 	}
 }
@@ -268,15 +224,11 @@ void vmbus_free_channels(void)
  * vmbus_process_offer - Process the offer by creating a channel/device
  * associated with this offer
  */
-static void vmbus_process_offer(struct work_struct *work)
+static void vmbus_process_offer(struct vmbus_channel *newchannel)
 {
-	struct vmbus_channel *newchannel = container_of(work,
-							struct vmbus_channel,
-							work);
 	struct vmbus_channel *channel;
 	bool fnew = true;
 	bool enq = false;
-	int ret;
 	unsigned long flags;
 
 	/* Make sure this is a new offer */
@@ -335,10 +287,11 @@ static void vmbus_process_offer(struct work_struct *work)
 			}
 
 			newchannel->state = CHANNEL_OPEN_STATE;
+			channel->num_sc++;
 			if (channel->sc_creation_callback != NULL)
 				channel->sc_creation_callback(newchannel);
 
-			goto done_init_rescind;
+			return;
 		}
 
 		goto err_free_chan;
@@ -361,33 +314,35 @@ static void vmbus_process_offer(struct work_struct *work)
 		&newchannel->offermsg.offer.if_instance,
 		newchannel);
 	if (!newchannel->device_obj)
-		goto err_free_chan;
+		goto err_deq_chan;
 
 	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
 	 * method.
 	 */
-	ret = vmbus_device_register(newchannel->device_obj);
-	if (ret != 0) {
+	if (vmbus_device_register(newchannel->device_obj) != 0) {
 		pr_err("unable to add child device object (relid %d)\n",
-			   newchannel->offermsg.child_relid);
-
-		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
-		list_del(&newchannel->listentry);
-		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+			newchannel->offermsg.child_relid);
 		kfree(newchannel->device_obj);
-		goto err_free_chan;
+		goto err_deq_chan;
 	}
-done_init_rescind:
-	spin_lock_irqsave(&newchannel->lock, flags);
-	/* The next possible work is rescind handling */
-	INIT_WORK(&newchannel->work, vmbus_process_rescind_offer);
-	/* Check if rescind offer was already received */
-	if (newchannel->rescind)
-		queue_work(newchannel->controlwq, &newchannel->work);
-	spin_unlock_irqrestore(&newchannel->lock, flags);
 	return;
+
+err_deq_chan:
+	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
+	list_del(&newchannel->listentry);
+	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+
+	if (newchannel->target_cpu != get_cpu()) {
+		put_cpu();
+		smp_call_function_single(newchannel->target_cpu,
+					 percpu_channel_deq, newchannel, true);
+	} else {
+		percpu_channel_deq(newchannel);
+		put_cpu();
+	}
+
 err_free_chan:
 	free_channel(newchannel);
 }
@@ -411,6 +366,8 @@ static const struct hv_vmbus_device_id hp_devs[] = {
 	{ HV_SCSI_GUID, },
 	/* Network */
 	{ HV_NIC_GUID, },
+	/* NetworkDirect Guest RDMA */
+	{ HV_ND_GUID, },
 };
 
 
@@ -511,8 +468,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 	newchannel->monitor_grp = (u8)offer->monitorid / 32;
 	newchannel->monitor_bit = (u8)offer->monitorid % 32;
 
-	INIT_WORK(&newchannel->work, vmbus_process_offer);
-	queue_work(newchannel->controlwq, &newchannel->work);
+	vmbus_process_offer(newchannel);
 }
 
 /*
@@ -525,28 +481,34 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	struct vmbus_channel_rescind_offer *rescind;
 	struct vmbus_channel *channel;
 	unsigned long flags;
+	struct device *dev;
 
 	rescind = (struct vmbus_channel_rescind_offer *)hdr;
 	channel = relid2channel(rescind->child_relid);
 
-	if (channel == NULL)
-		/* Just return here, no channel found */
+	if (channel == NULL) {
+		hv_process_channel_removal(NULL, rescind->child_relid);
 		return;
+	}
 
 	spin_lock_irqsave(&channel->lock, flags);
 	channel->rescind = true;
-	/*
-	 * channel->work.func != vmbus_process_rescind_offer means we are still
-	 * processing offer request and the rescind offer processing should be
-	 * postponed. It will be done at the very end of vmbus_process_offer()
-	 * as rescind flag is being checked there.
-	 */
-	if (channel->work.func == vmbus_process_rescind_offer)
-		/* work is initialized for vmbus_process_rescind_offer() from
-		 * vmbus_process_offer() where the channel got created */
-		queue_work(channel->controlwq, &channel->work);
-
 	spin_unlock_irqrestore(&channel->lock, flags);
+
+	if (channel->device_obj) {
+		/*
+		 * We will have to unregister this device from the
+		 * driver core.
+		 */
+		dev = get_device(&channel->device_obj->device);
+		if (dev) {
+			vmbus_device_unregister(channel->device_obj);
+			put_device(dev);
+		}
+	} else {
+		hv_process_channel_removal(channel,
+			channel->offermsg.child_relid);
+	}
 }
 
 /*
@@ -731,25 +693,25 @@ static void vmbus_onversion_response(
 }
 
 /* Channel message dispatch table */
-static struct vmbus_channel_message_table_entry
+struct vmbus_channel_message_table_entry
 	channel_message_table[CHANNELMSG_COUNT] = {
-	{CHANNELMSG_INVALID,			NULL},
-	{CHANNELMSG_OFFERCHANNEL,		vmbus_onoffer},
-	{CHANNELMSG_RESCIND_CHANNELOFFER,	vmbus_onoffer_rescind},
-	{CHANNELMSG_REQUESTOFFERS,		NULL},
-	{CHANNELMSG_ALLOFFERS_DELIVERED,	vmbus_onoffers_delivered},
-	{CHANNELMSG_OPENCHANNEL,		NULL},
-	{CHANNELMSG_OPENCHANNEL_RESULT,	vmbus_onopen_result},
-	{CHANNELMSG_CLOSECHANNEL,		NULL},
-	{CHANNELMSG_GPADL_HEADER,		NULL},
-	{CHANNELMSG_GPADL_BODY,		NULL},
-	{CHANNELMSG_GPADL_CREATED,		vmbus_ongpadl_created},
-	{CHANNELMSG_GPADL_TEARDOWN,		NULL},
-	{CHANNELMSG_GPADL_TORNDOWN,		vmbus_ongpadl_torndown},
-	{CHANNELMSG_RELID_RELEASED,		NULL},
-	{CHANNELMSG_INITIATE_CONTACT,		NULL},
-	{CHANNELMSG_VERSION_RESPONSE,		vmbus_onversion_response},
-	{CHANNELMSG_UNLOAD,			NULL},
+	{CHANNELMSG_INVALID,			0, NULL},
+	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
+	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
+	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
+	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
+	{CHANNELMSG_OPENCHANNEL,		0, NULL},
+	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
+	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
+	{CHANNELMSG_GPADL_HEADER,		0, NULL},
+	{CHANNELMSG_GPADL_BODY,			0, NULL},
+	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
+	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
+	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
+	{CHANNELMSG_RELID_RELEASED,		0, NULL},
+	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
+	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
+	{CHANNELMSG_UNLOAD,			0, NULL},
 };
 
 /*
@@ -787,7 +749,7 @@ int vmbus_request_offers(void)
 {
 	struct vmbus_channel_message_header *msg;
 	struct vmbus_channel_msginfo *msginfo;
-	int ret, t;
+	int ret;
 
 	msginfo = kmalloc(sizeof(*msginfo) +
 			  sizeof(struct vmbus_channel_message_header),
@@ -795,8 +757,6 @@ int vmbus_request_offers(void)
 	if (!msginfo)
 		return -ENOMEM;
 
-	init_completion(&msginfo->waitevent);
-
 	msg = (struct vmbus_channel_message_header *)msginfo->msg;
 
 	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
@@ -810,14 +770,6 @@ int vmbus_request_offers(void)
 		goto cleanup;
 	}
 
-	t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ);
-	if (t == 0) {
-		ret = -ETIMEDOUT;
-		goto cleanup;
-	}
-
-
-
 cleanup:
 	kfree(msginfo);
 
@@ -826,9 +778,8 @@ cleanup:
 
 /*
  * Retrieve the (sub) channel on which to send an outgoing request.
- * When a primary channel has multiple sub-channels, we choose a
- * channel whose VCPU binding is closest to the VCPU on which
- * this call is being made.
+ * When a primary channel has multiple sub-channels, we try to
+ * distribute the load equally amongst all available channels.
  */
 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
 {
@@ -836,11 +787,19 @@ struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
 	int cur_cpu;
 	struct vmbus_channel *cur_channel;
 	struct vmbus_channel *outgoing_channel = primary;
-	int cpu_distance, new_cpu_distance;
+	int next_channel;
+	int i = 1;
 
 	if (list_empty(&primary->sc_list))
 		return outgoing_channel;
 
+	next_channel = primary->next_oc++;
+
+	if (next_channel > (primary->num_sc)) {
+		primary->next_oc = 0;
+		return outgoing_channel;
+	}
+
 	cur_cpu = hv_context.vp_index[get_cpu()];
 	put_cpu();
 	list_for_each_safe(cur, tmp, &primary->sc_list) {
@@ -851,18 +810,10 @@ struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
 		if (cur_channel->target_vp == cur_cpu)
 			return cur_channel;
 
-		cpu_distance = ((outgoing_channel->target_vp > cur_cpu) ?
-				(outgoing_channel->target_vp - cur_cpu) :
-				(cur_cpu - outgoing_channel->target_vp));
-
-		new_cpu_distance = ((cur_channel->target_vp > cur_cpu) ?
-				(cur_channel->target_vp - cur_cpu) :
-				(cur_cpu - cur_channel->target_vp));
-
-		if (cpu_distance < new_cpu_distance)
-			continue;
+		if (i == next_channel)
+			return cur_channel;
 
-		outgoing_channel = cur_channel;
+		i++;
 	}
 
 	return outgoing_channel;
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index a63a795300b9..b27220a425f4 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -216,10 +216,21 @@ int vmbus_connect(void)
 
 cleanup:
 	pr_err("Unable to connect to host\n");
+
 	vmbus_connection.conn_state = DISCONNECTED;
+	vmbus_disconnect();
+
+	kfree(msginfo);
 
-	if (vmbus_connection.work_queue)
+	return ret;
+}
+
+void vmbus_disconnect(void)
+{
+	if (vmbus_connection.work_queue) {
+		drain_workqueue(vmbus_connection.work_queue);
 		destroy_workqueue(vmbus_connection.work_queue);
+	}
 
 	if (vmbus_connection.int_page) {
 		free_pages((unsigned long)vmbus_connection.int_page, 0);
@@ -230,10 +241,6 @@ cleanup:
 	free_pages((unsigned long)vmbus_connection.monitor_pages[1], 0);
 	vmbus_connection.monitor_pages[0] = NULL;
 	vmbus_connection.monitor_pages[1] = NULL;
-
-	kfree(msginfo);
-
-	return ret;
 }
 
 /*
@@ -311,10 +318,8 @@ static void process_chn_event(u32 relid)
 	 */
 	channel = pcpu_relid2channel(relid);
 
-	if (!channel) {
-		pr_err("channel not found for relid - %u\n", relid);
+	if (!channel)
 		return;
-	}
 
 	/*
 	 * A channel once created is persistent even when there
@@ -349,10 +354,7 @@ static void process_chn_event(u32 relid)
 			else
 				bytes_to_read = 0;
 		} while (read_state && (bytes_to_read != 0));
-	} else {
-		pr_err("no channel callback for relid - %u\n", relid);
 	}
-
 }
 
 /*
@@ -420,6 +422,7 @@ int vmbus_post_msg(void *buffer, size_t buflen)
 	union hv_connection_id conn_id;
 	int ret = 0;
 	int retries = 0;
+	u32 msec = 1;
 
 	conn_id.asu32 = 0;
 	conn_id.u.id = VMBUS_MESSAGE_CONNECTION_ID;
@@ -429,13 +432,20 @@ int vmbus_post_msg(void *buffer, size_t buflen)
 	 * insufficient resources. Retry the operation a couple of
 	 * times before giving up.
 	 */
-	while (retries < 10) {
+	while (retries < 20) {
 		ret = hv_post_message(conn_id, 1, buffer, buflen);
 
 		switch (ret) {
+		case HV_STATUS_INVALID_CONNECTION_ID:
+			/*
+			 * We could get this if we send messages too
+			 * frequently.
+			 */
+			ret = -EAGAIN;
+			break;
+		case HV_STATUS_INSUFFICIENT_MEMORY:
 		case HV_STATUS_INSUFFICIENT_BUFFERS:
 			ret = -ENOMEM;
-		case -ENOMEM:
 			break;
 		case HV_STATUS_SUCCESS:
 			return ret;
@@ -445,7 +455,9 @@ int vmbus_post_msg(void *buffer, size_t buflen)
 		}
 
 		retries++;
-		msleep(100);
+		msleep(msec);
+		if (msec < 2048)
+			msec *= 2;
 	}
 	return ret;
 }
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 50e51a51ff8b..d3943bceecc3 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -312,7 +312,11 @@ static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
 	dev->features = CLOCK_EVT_FEAT_ONESHOT;
 	dev->cpumask = cpumask_of(cpu);
 	dev->rating = 1000;
-	dev->owner = THIS_MODULE;
+	/*
+	 * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
+	 * result in clockevents_config_and_register() taking additional
+	 * references to the hv_vmbus module making it impossible to unload.
+	 */
 
 	dev->set_mode = hv_ce_setmode;
 	dev->set_next_event = hv_ce_set_next_event;
@@ -470,6 +474,20 @@ void hv_synic_init(void *arg)
 }
 
 /*
+ * hv_synic_clockevents_cleanup - Cleanup clockevent devices
+ */
+void hv_synic_clockevents_cleanup(void)
+{
+	int cpu;
+
+	if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
+		return;
+
+	for_each_online_cpu(cpu)
+		clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
+}
+
+/*
  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
  */
 void hv_synic_cleanup(void *arg)
@@ -477,11 +495,17 @@ void hv_synic_cleanup(void *arg)
 	union hv_synic_sint shared_sint;
 	union hv_synic_simp simp;
 	union hv_synic_siefp siefp;
+	union hv_synic_scontrol sctrl;
 	int cpu = smp_processor_id();
 
 	if (!hv_context.synic_initialized)
 		return;
 
+	/* Turn off clockevent device */
+	if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
+		hv_ce_setmode(CLOCK_EVT_MODE_SHUTDOWN,
+			      hv_context.clk_evt[cpu]);
+
 	rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 
 	shared_sint.masked = 1;
@@ -502,6 +526,10 @@ void hv_synic_cleanup(void *arg)
 
 	wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
 
-	free_page((unsigned long)hv_context.synic_message_page[cpu]);
-	free_page((unsigned long)hv_context.synic_event_page[cpu]);
+	/* Disable the global synic bit */
+	rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
+	sctrl.enable = 0;
+	wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
+
+	hv_synic_free_cpu(cpu);
 }
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index ff169386b2c7..cb5b7dc9797f 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -428,14 +428,13 @@ struct dm_info_msg {
  * currently hot added. We hot add in multiples of 128M
  * chunks; it is possible that we may not be able to bring
  * online all the pages in the region. The range
- * covered_start_pfn : covered_end_pfn defines the pages that can
+ * covered_end_pfn defines the pages that can
  * be brough online.
  */
 
 struct hv_hotadd_state {
 	struct list_head list;
 	unsigned long start_pfn;
-	unsigned long covered_start_pfn;
 	unsigned long covered_end_pfn;
 	unsigned long ha_end_pfn;
 	unsigned long end_pfn;
@@ -503,6 +502,8 @@ struct hv_dynmem_device {
 	 * Number of pages we have currently ballooned out.
 	 */
 	unsigned int num_pages_ballooned;
+	unsigned int num_pages_onlined;
+	unsigned int num_pages_added;
 
 	/*
 	 * State to manage the ballooning (up) operation.
@@ -534,7 +535,6 @@ struct hv_dynmem_device {
 	struct task_struct *thread;
 
 	struct mutex ha_region_mutex;
-	struct completion waiter_event;
 
 	/*
 	 * A list of hot-add regions.
@@ -554,46 +554,32 @@ static struct hv_dynmem_device dm_device;
 static void post_status(struct hv_dynmem_device *dm);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static void acquire_region_mutex(bool trylock)
-{
-	if (trylock) {
-		reinit_completion(&dm_device.waiter_event);
-		while (!mutex_trylock(&dm_device.ha_region_mutex))
-			wait_for_completion(&dm_device.waiter_event);
-	} else {
-		mutex_lock(&dm_device.ha_region_mutex);
-	}
-}
-
-static void release_region_mutex(bool trylock)
-{
-	if (trylock) {
-		mutex_unlock(&dm_device.ha_region_mutex);
-	} else {
-		mutex_unlock(&dm_device.ha_region_mutex);
-		complete(&dm_device.waiter_event);
-	}
-}
-
 static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
 			      void *v)
 {
+	struct memory_notify *mem = (struct memory_notify *)v;
+
 	switch (val) {
 	case MEM_GOING_ONLINE:
-		acquire_region_mutex(true);
+		mutex_lock(&dm_device.ha_region_mutex);
 		break;
 
 	case MEM_ONLINE:
+		dm_device.num_pages_onlined += mem->nr_pages;
 	case MEM_CANCEL_ONLINE:
-		release_region_mutex(true);
+		mutex_unlock(&dm_device.ha_region_mutex);
 		if (dm_device.ha_waiting) {
 			dm_device.ha_waiting = false;
 			complete(&dm_device.ol_waitevent);
 		}
 		break;
 
-	case MEM_GOING_OFFLINE:
 	case MEM_OFFLINE:
+		mutex_lock(&dm_device.ha_region_mutex);
+		dm_device.num_pages_onlined -= mem->nr_pages;
+		mutex_unlock(&dm_device.ha_region_mutex);
+		break;
+	case MEM_GOING_OFFLINE:
 	case MEM_CANCEL_OFFLINE:
 		break;
 	}
@@ -646,7 +632,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		init_completion(&dm_device.ol_waitevent);
 		dm_device.ha_waiting = true;
 
-		release_region_mutex(false);
+		mutex_unlock(&dm_device.ha_region_mutex);
 		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
 		ret = add_memory(nid, PFN_PHYS((start_pfn)),
 				(HA_CHUNK << PAGE_SHIFT));
@@ -665,6 +651,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 			}
 			has->ha_end_pfn -= HA_CHUNK;
 			has->covered_end_pfn -=  processed_pfn;
+			mutex_lock(&dm_device.ha_region_mutex);
 			break;
 		}
 
@@ -675,7 +662,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		 * have not been "onlined" within the allowed time.
 		 */
 		wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-		acquire_region_mutex(false);
+		mutex_lock(&dm_device.ha_region_mutex);
 		post_status(&dm_device);
 	}
 
@@ -691,8 +678,7 @@ static void hv_online_page(struct page *pg)
 
 	list_for_each(cur, &dm_device.ha_region_list) {
 		has = list_entry(cur, struct hv_hotadd_state, list);
-		cur_start_pgp = (unsigned long)
-				pfn_to_page(has->covered_start_pfn);
+		cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn);
 		cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
 
 		if (((unsigned long)pg >= cur_start_pgp) &&
@@ -704,7 +690,6 @@ static void hv_online_page(struct page *pg)
 			__online_page_set_limits(pg);
 			__online_page_increment_counters(pg);
 			__online_page_free(pg);
-			has->covered_start_pfn++;
 		}
 	}
 }
@@ -748,10 +733,9 @@ static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
 		 * is, update it.
 		 */
 
-		if (has->covered_end_pfn != start_pfn) {
+		if (has->covered_end_pfn != start_pfn)
 			has->covered_end_pfn = start_pfn;
-			has->covered_start_pfn = start_pfn;
-		}
+
 		return true;
 
 	}
@@ -794,9 +778,18 @@ static unsigned long handle_pg_range(unsigned long pg_start,
 			pgs_ol = has->ha_end_pfn - start_pfn;
 			if (pgs_ol > pfn_cnt)
 				pgs_ol = pfn_cnt;
-			hv_bring_pgs_online(start_pfn, pgs_ol);
+
+			/*
+			 * Check if the corresponding memory block is already
+			 * online by checking its last previously backed page.
+			 * In case it is we need to bring rest (which was not
+			 * backed previously) online too.
+			 */
+			if (start_pfn > has->start_pfn &&
+			    !PageReserved(pfn_to_page(start_pfn - 1)))
+				hv_bring_pgs_online(start_pfn, pgs_ol);
+
 			has->covered_end_pfn +=  pgs_ol;
-			has->covered_start_pfn +=  pgs_ol;
 			pfn_cnt -= pgs_ol;
 		}
 
@@ -857,7 +850,6 @@ static unsigned long process_hot_add(unsigned long pg_start,
 		list_add_tail(&ha_region->list, &dm_device.ha_region_list);
 		ha_region->start_pfn = rg_start;
 		ha_region->ha_end_pfn = rg_start;
-		ha_region->covered_start_pfn = pg_start;
 		ha_region->covered_end_pfn = pg_start;
 		ha_region->end_pfn = rg_start + rg_size;
 	}
@@ -886,7 +878,7 @@ static void hot_add_req(struct work_struct *dummy)
 	resp.hdr.size = sizeof(struct dm_hot_add_response);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-	acquire_region_mutex(false);
+	mutex_lock(&dm_device.ha_region_mutex);
 	pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
 	pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
 
@@ -918,7 +910,9 @@ static void hot_add_req(struct work_struct *dummy)
 	if (do_hot_add)
 		resp.page_count = process_hot_add(pg_start, pfn_cnt,
 						rg_start, rg_sz);
-	release_region_mutex(false);
+
+	dm->num_pages_added += resp.page_count;
+	mutex_unlock(&dm_device.ha_region_mutex);
 #endif
 	/*
 	 * The result field of the response structure has the
@@ -982,8 +976,8 @@ static unsigned long compute_balloon_floor(void)
 	 *     128        72    (1/2)
 	 *     512       168    (1/4)
 	 *    2048       360    (1/8)
-	 *    8192       768    (1/16)
-	 *   32768      1536	(1/32)
+	 *    8192       744    (1/16)
+	 *   32768      1512	(1/32)
 	 */
 	if (totalram_pages < MB2PAGES(128))
 		min_pages = MB2PAGES(8) + (totalram_pages >> 1);
@@ -992,9 +986,9 @@ static unsigned long compute_balloon_floor(void)
 	else if (totalram_pages < MB2PAGES(2048))
 		min_pages = MB2PAGES(104) + (totalram_pages >> 3);
 	else if (totalram_pages < MB2PAGES(8192))
-		min_pages = MB2PAGES(256) + (totalram_pages >> 4);
+		min_pages = MB2PAGES(232) + (totalram_pages >> 4);
 	else
-		min_pages = MB2PAGES(512) + (totalram_pages >> 5);
+		min_pages = MB2PAGES(488) + (totalram_pages >> 5);
 #undef MB2PAGES
 	return min_pages;
 }
@@ -1031,17 +1025,21 @@ static void post_status(struct hv_dynmem_device *dm)
 	status.hdr.trans_id = atomic_inc_return(&trans_id);
 
 	/*
-	 * The host expects the guest to report free memory.
-	 * Further, the host expects the pressure information to
-	 * include the ballooned out pages.
-	 * For a given amount of memory that we are managing, we
-	 * need to compute a floor below which we should not balloon.
-	 * Compute this and add it to the pressure report.
+	 * The host expects the guest to report free and committed memory.
+	 * Furthermore, the host expects the pressure information to include
+	 * the ballooned out pages. For a given amount of memory that we are
+	 * managing we need to compute a floor below which we should not
+	 * balloon. Compute this and add it to the pressure report.
+	 * We also need to report all offline pages (num_pages_added -
+	 * num_pages_onlined) as committed to the host, otherwise it can try
+	 * asking us to balloon them out.
 	 */
 	status.num_avail = val.freeram;
 	status.num_committed = vm_memory_committed() +
-				dm->num_pages_ballooned +
-				compute_balloon_floor();
+		dm->num_pages_ballooned +
+		(dm->num_pages_added > dm->num_pages_onlined ?
+		 dm->num_pages_added - dm->num_pages_onlined : 0) +
+		compute_balloon_floor();
 
 	/*
 	 * If our transaction ID is no longer current, just don't
@@ -1083,11 +1081,12 @@ static void free_balloon_pages(struct hv_dynmem_device *dm,
 
 
 
-static int  alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
-			 struct dm_balloon_response *bl_resp, int alloc_unit,
-			 bool *alloc_error)
+static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
+					unsigned int num_pages,
+					struct dm_balloon_response *bl_resp,
+					int alloc_unit)
 {
-	int i = 0;
+	unsigned int i = 0;
 	struct page *pg;
 
 	if (num_pages < alloc_unit)
@@ -1106,11 +1105,8 @@ static int  alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
 				__GFP_NOMEMALLOC | __GFP_NOWARN,
 				get_order(alloc_unit << PAGE_SHIFT));
 
-		if (!pg) {
-			*alloc_error = true;
+		if (!pg)
 			return i * alloc_unit;
-		}
-
 
 		dm->num_pages_ballooned += alloc_unit;
 
@@ -1137,14 +1133,15 @@ static int  alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
 
 static void balloon_up(struct work_struct *dummy)
 {
-	int num_pages = dm_device.balloon_wrk.num_pages;
-	int num_ballooned = 0;
+	unsigned int num_pages = dm_device.balloon_wrk.num_pages;
+	unsigned int num_ballooned = 0;
 	struct dm_balloon_response *bl_resp;
 	int alloc_unit;
 	int ret;
-	bool alloc_error;
 	bool done = false;
 	int i;
+	struct sysinfo val;
+	unsigned long floor;
 
 	/* The host balloons pages in 2M granularity. */
 	WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0);
@@ -1155,6 +1152,15 @@ static void balloon_up(struct work_struct *dummy)
 	 */
 	alloc_unit = 512;
 
+	si_meminfo(&val);
+	floor = compute_balloon_floor();
+
+	/* Refuse to balloon below the floor, keep the 2M granularity. */
+	if (val.freeram < num_pages || val.freeram - num_pages < floor) {
+		num_pages = val.freeram > floor ? (val.freeram - floor) : 0;
+		num_pages -= num_pages % PAGES_IN_2M;
+	}
+
 	while (!done) {
 		bl_resp = (struct dm_balloon_response *)send_buffer;
 		memset(send_buffer, 0, PAGE_SIZE);
@@ -1164,18 +1170,15 @@ static void balloon_up(struct work_struct *dummy)
 
 
 		num_pages -= num_ballooned;
-		alloc_error = false;
 		num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
-						bl_resp, alloc_unit,
-						 &alloc_error);
+						    bl_resp, alloc_unit);
 
 		if (alloc_unit != 1 && num_ballooned == 0) {
 			alloc_unit = 1;
 			continue;
 		}
 
-		if ((alloc_unit == 1 && alloc_error) ||
-			(num_ballooned == num_pages)) {
+		if (num_ballooned == 0 || num_ballooned == num_pages) {
 			bl_resp->more_pages = 0;
 			done = true;
 			dm_device.state = DM_INITIALIZED;
@@ -1414,7 +1417,8 @@ static void balloon_onchannelcallback(void *context)
 static int balloon_probe(struct hv_device *dev,
 			const struct hv_vmbus_device_id *dev_id)
 {
-	int ret, t;
+	int ret;
+	unsigned long t;
 	struct dm_version_request version_req;
 	struct dm_capabilities cap_msg;
 
@@ -1439,7 +1443,6 @@ static int balloon_probe(struct hv_device *dev,
 	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
 	init_completion(&dm_device.host_event);
 	init_completion(&dm_device.config_event);
-	init_completion(&dm_device.waiter_event);
 	INIT_LIST_HEAD(&dm_device.ha_region_list);
 	mutex_init(&dm_device.ha_region_mutex);
 	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 3b9c9ef0deb8..7994ec2e4151 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -340,12 +340,8 @@ static int util_probe(struct hv_device *dev,
 
 	set_channel_read_state(dev->channel, false);
 
-	ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
-			srv->util_cb, dev->channel);
-	if (ret)
-		goto error;
-
 	hv_set_drvdata(dev, srv);
+
 	/*
 	 * Based on the host; initialize the framework and
 	 * service version numbers we will negotiate.
@@ -365,6 +361,11 @@ static int util_probe(struct hv_device *dev,
 		hb_srv_version = HB_VERSION;
 	}
 
+	ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
+			srv->util_cb, dev->channel);
+	if (ret)
+		goto error;
+
 	return 0;
 
 error:
@@ -379,9 +380,9 @@ static int util_remove(struct hv_device *dev)
 {
 	struct hv_util_service *srv = hv_get_drvdata(dev);
 
-	vmbus_close(dev->channel);
 	if (srv->util_deinit)
 		srv->util_deinit();
+	vmbus_close(dev->channel);
 	kfree(srv->recv_buffer);
 
 	return 0;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 44b1c9424712..887287ad411f 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -49,6 +49,17 @@ enum hv_cpuid_function {
 	HVCPUID_IMPLEMENTATION_LIMITS		= 0x40000005,
 };
 
+#define  HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE   0x400
+
+#define HV_X64_MSR_CRASH_P0   0x40000100
+#define HV_X64_MSR_CRASH_P1   0x40000101
+#define HV_X64_MSR_CRASH_P2   0x40000102
+#define HV_X64_MSR_CRASH_P3   0x40000103
+#define HV_X64_MSR_CRASH_P4   0x40000104
+#define HV_X64_MSR_CRASH_CTL  0x40000105
+
+#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
+
 /* Define version of the synthetic interrupt controller. */
 #define HV_SYNIC_VERSION		(1)
 
@@ -572,6 +583,8 @@ extern void hv_synic_init(void *irqarg);
 
 extern void hv_synic_cleanup(void *arg);
 
+extern void hv_synic_clockevents_cleanup(void);
+
 /*
  * Host version information.
  */
@@ -672,6 +685,23 @@ struct vmbus_msginfo {
 
 extern struct vmbus_connection vmbus_connection;
 
+enum vmbus_message_handler_type {
+	/* The related handler can sleep. */
+	VMHT_BLOCKING = 0,
+
+	/* The related handler must NOT sleep. */
+	VMHT_NON_BLOCKING = 1,
+};
+
+struct vmbus_channel_message_table_entry {
+	enum vmbus_channel_message_type message_type;
+	enum vmbus_message_handler_type handler_type;
+	void (*message_handler)(struct vmbus_channel_message_header *msg);
+};
+
+extern struct vmbus_channel_message_table_entry
+	channel_message_table[CHANNELMSG_COUNT];
+
 /* General vmbus interface */
 
 struct hv_device *vmbus_device_create(const uuid_le *type,
@@ -692,6 +722,7 @@ void vmbus_free_channels(void);
 /* Connection interface */
 
 int vmbus_connect(void);
+void vmbus_disconnect(void);
 
 int vmbus_post_msg(void *buffer, size_t buflen);
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index f518b8d7a5b5..c85235e9f245 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -33,9 +33,12 @@
 #include <linux/hyperv.h>
 #include <linux/kernel_stat.h>
 #include <linux/clockchips.h>
+#include <linux/cpu.h>
 #include <asm/hyperv.h>
 #include <asm/hypervisor.h>
 #include <asm/mshyperv.h>
+#include <linux/notifier.h>
+#include <linux/ptrace.h>
 #include "hyperv_vmbus.h"
 
 static struct acpi_device  *hv_acpi_dev;
@@ -44,6 +47,31 @@ static struct tasklet_struct msg_dpc;
 static struct completion probe_event;
 static int irq;
 
+
+static int hyperv_panic_event(struct notifier_block *nb,
+			unsigned long event, void *ptr)
+{
+	struct pt_regs *regs;
+
+	regs = current_pt_regs();
+
+	wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip);
+	wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax);
+	wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx);
+	wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx);
+	wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx);
+
+	/*
+	 * Let Hyper-V know there is crash data available
+	 */
+	wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block hyperv_panic_block = {
+	.notifier_call = hyperv_panic_event,
+};
+
 struct resource hyperv_mmio = {
 	.name  = "hyperv mmio",
 	.flags = IORESOURCE_MEM,
@@ -507,14 +535,26 @@ static int vmbus_probe(struct device *child_device)
  */
 static int vmbus_remove(struct device *child_device)
 {
-	struct hv_driver *drv = drv_to_hv_drv(child_device->driver);
+	struct hv_driver *drv;
 	struct hv_device *dev = device_to_hv_device(child_device);
-
-	if (drv->remove)
-		drv->remove(dev);
-	else
-		pr_err("remove not set for driver %s\n",
-			dev_name(child_device));
+	u32 relid = dev->channel->offermsg.child_relid;
+
+	if (child_device->driver) {
+		drv = drv_to_hv_drv(child_device->driver);
+		if (drv->remove)
+			drv->remove(dev);
+		else {
+			hv_process_channel_removal(dev->channel, relid);
+			pr_err("remove not set for driver %s\n",
+				dev_name(child_device));
+		}
+	} else {
+		/*
+		 * We don't have a driver for this device; deal with the
+		 * rescind message by removing the channel.
+		 */
+		hv_process_channel_removal(dev->channel, relid);
+	}
 
 	return 0;
 }
@@ -573,6 +613,10 @@ static void vmbus_onmessage_work(struct work_struct *work)
 {
 	struct onmessage_work_context *ctx;
 
+	/* Do not process messages if we're in DISCONNECTED state */
+	if (vmbus_connection.conn_state == DISCONNECTED)
+		return;
+
 	ctx = container_of(work, struct onmessage_work_context,
 			   work);
 	vmbus_onmessage(&ctx->msg);
@@ -613,21 +657,36 @@ static void vmbus_on_msg_dpc(unsigned long data)
 	void *page_addr = hv_context.synic_message_page[cpu];
 	struct hv_message *msg = (struct hv_message *)page_addr +
 				  VMBUS_MESSAGE_SINT;
+	struct vmbus_channel_message_header *hdr;
+	struct vmbus_channel_message_table_entry *entry;
 	struct onmessage_work_context *ctx;
 
 	while (1) {
-		if (msg->header.message_type == HVMSG_NONE) {
+		if (msg->header.message_type == HVMSG_NONE)
 			/* no msg */
 			break;
-		} else {
+
+		hdr = (struct vmbus_channel_message_header *)msg->u.payload;
+
+		if (hdr->msgtype >= CHANNELMSG_COUNT) {
+			WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype);
+			goto msg_handled;
+		}
+
+		entry = &channel_message_table[hdr->msgtype];
+		if (entry->handler_type	== VMHT_BLOCKING) {
 			ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
 			if (ctx == NULL)
 				continue;
+
 			INIT_WORK(&ctx->work, vmbus_onmessage_work);
 			memcpy(&ctx->msg, msg, sizeof(*msg));
+
 			queue_work(vmbus_connection.work_queue, &ctx->work);
-		}
+		} else
+			entry->message_handler(hdr);
 
+msg_handled:
 		msg->header.message_type = HVMSG_NONE;
 
 		/*
@@ -704,6 +763,39 @@ static void vmbus_isr(void)
 	}
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static int hyperv_cpu_disable(void)
+{
+	return -ENOSYS;
+}
+
+static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
+{
+	static void *previous_cpu_disable;
+
+	/*
+	 * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8,
+	 * ...) is not supported at this moment as channel interrupts are
+	 * distributed across all of them.
+	 */
+
+	if ((vmbus_proto_version == VERSION_WS2008) ||
+	    (vmbus_proto_version == VERSION_WIN7))
+		return;
+
+	if (vmbus_loaded) {
+		previous_cpu_disable = smp_ops.cpu_disable;
+		smp_ops.cpu_disable = hyperv_cpu_disable;
+		pr_notice("CPU offlining is not supported by hypervisor\n");
+	} else if (previous_cpu_disable)
+		smp_ops.cpu_disable = previous_cpu_disable;
+}
+#else
+static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
+{
+}
+#endif
+
 /*
  * vmbus_bus_init -Main vmbus driver initialization routine.
  *
@@ -744,6 +836,16 @@ static int vmbus_bus_init(int irq)
 	if (ret)
 		goto err_alloc;
 
+	hv_cpu_hotplug_quirk(true);
+
+	/*
+	 * Only register if the crash MSRs are available
+	 */
+	if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
+		atomic_notifier_chain_register(&panic_notifier_list,
+					       &hyperv_panic_block);
+	}
+
 	vmbus_request_offers();
 
 	return 0;
@@ -840,10 +942,8 @@ int vmbus_device_register(struct hv_device *child_device_obj)
 {
 	int ret = 0;
 
-	static atomic_t device_num = ATOMIC_INIT(0);
-
-	dev_set_name(&child_device_obj->device, "vmbus_0_%d",
-		     atomic_inc_return(&device_num));
+	dev_set_name(&child_device_obj->device, "vmbus_%d",
+		     child_device_obj->channel->id);
 
 	child_device_obj->device.bus = &hv_bus;
 	child_device_obj->device.parent = &hv_acpi_dev->dev;
@@ -992,11 +1092,19 @@ cleanup:
 
 static void __exit vmbus_exit(void)
 {
+	int cpu;
+
+	vmbus_connection.conn_state = DISCONNECTED;
+	hv_synic_clockevents_cleanup();
 	hv_remove_vmbus_irq();
 	vmbus_free_channels();
 	bus_unregister(&hv_bus);
 	hv_cleanup();
+	for_each_online_cpu(cpu)
+		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
 	acpi_bus_unregister_driver(&vmbus_acpi_driver);
+	hv_cpu_hotplug_quirk(false);
+	vmbus_disconnect();
 }