xen/arm: reimplement xen_dma_unmap_page & friends

xen_dma_unmap_page, xen_dma_sync_single_for_cpu and xen_dma_sync_single_for_device are currently implemented by calling into the corresponding generic ARM implementation of these functions. In order to do this, firstly the dma_addr_t handle, that on Xen is a machine address, needs to be translated into a physical address. The operation is expensive and inaccurate, given that a single machine address can correspond to multiple physical addresses in one domain, because the same page can be granted multiple times by the frontend. To avoid this problem, we introduce a Xen specific implementation of xen_dma_unmap_page, xen_dma_sync_single_for_cpu and xen_dma_sync_single_for_device, that can operate on machine addresses directly. The new implementation relies on the fact that the hypervisor creates a second p2m mapping of any grant pages at physical address == machine address of the page for dom0. Therefore we can access memory at physical address == dma_addr_r handle and perform the cache flushing there. Some cache maintenance operations require a virtual address. Instead of using ioremap_cache, that is not safe in interrupt context, we allocate a per-cpu PAGE_KERNEL scratch page and we manually update the pte for it. arm64 doesn't need cache maintenance operations on unmap for now. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Tested-by: Denis Schneider <v1ne2go@gmail.com>
author: Stefano Stabellini <stefano.stabellini@eu.citrix.com> 2014-09-10 22:49:41 +0000
committer: Stefano Stabellini <stefano.stabellini@eu.citrix.com> 2014-09-11 18:11:53 +0000
commit: 340720be32d458ee11d1117719a8e4b6b82981b2 (patch)
tree: 11489bb337f841f9bf3cce1a77771c2412c566c2 /arch/arm/xen
parent: 5ebc77de83c7b74543de774afa7395b3b790e65e (diff)
download: talos-op-linux-340720be32d458ee11d1117719a8e4b6b82981b2.tar.gz
talos-op-linux-340720be32d458ee11d1117719a8e4b6b82981b2.zip
2 files changed, 203 insertions, 1 deletions
diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile
index 12969523414c..1f85bfe6b470 100644
--- a/arch/arm/xen/Makefile
+++ b/arch/arm/xen/Makefile
@@ -1 +1 @@
-obj-y		:= enlighten.o hypercall.o grant-table.o p2m.o mm.o
+obj-y		:= enlighten.o hypercall.o grant-table.o p2m.o mm.o mm32.o
diff --git a/arch/arm/xen/mm32.c b/arch/arm/xen/mm32.c
new file mode 100644
index 000000000000..3b99860fd7ae
--- /dev/null
+++ b/arch/arm/xen/mm32.c
@@ -0,0 +1,202 @@
+#include <linux/cpu.h>
+#include <linux/dma-mapping.h>
+#include <linux/gfp.h>
+#include <linux/highmem.h>
+
+#include <xen/features.h>
+
+static DEFINE_PER_CPU(unsigned long, xen_mm32_scratch_virt);
+static DEFINE_PER_CPU(pte_t *, xen_mm32_scratch_ptep);
+
+static int alloc_xen_mm32_scratch_page(int cpu)
+{
+	struct page *page;
+	unsigned long virt;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	if (per_cpu(xen_mm32_scratch_ptep, cpu) != NULL)
+		return 0;
+
+	page = alloc_page(GFP_KERNEL);
+	if (page == NULL) {
+		pr_warn("Failed to allocate xen_mm32_scratch_page for cpu %d\n", cpu);
+		return -ENOMEM;
+	}
+
+	virt = (unsigned long)__va(page_to_phys(page));
+	pmdp = pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
+	ptep = pte_offset_kernel(pmdp, virt);
+
+	per_cpu(xen_mm32_scratch_virt, cpu) = virt;
+	per_cpu(xen_mm32_scratch_ptep, cpu) = ptep;
+
+	return 0;
+}
+
+static int xen_mm32_cpu_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
+{
+	int cpu = (long)hcpu;
+	switch (action) {
+	case CPU_UP_PREPARE:
+		if (alloc_xen_mm32_scratch_page(cpu))
+			return NOTIFY_BAD;
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block xen_mm32_cpu_notifier = {
+	.notifier_call	= xen_mm32_cpu_notify,
+};
+
+static void* xen_mm32_remap_page(dma_addr_t handle)
+{
+	unsigned long virt = get_cpu_var(xen_mm32_scratch_virt);
+	pte_t *ptep = __get_cpu_var(xen_mm32_scratch_ptep);
+
+	*ptep = pfn_pte(handle >> PAGE_SHIFT, PAGE_KERNEL);
+	local_flush_tlb_kernel_page(virt);
+
+	return (void*)virt;
+}
+
+static void xen_mm32_unmap(void *vaddr)
+{
+	put_cpu_var(xen_mm32_scratch_virt);
+}
+
+
+/* functions called by SWIOTLB */
+
+static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
+	size_t size, enum dma_data_direction dir,
+	void (*op)(const void *, size_t, int))
+{
+	unsigned long pfn;
+	size_t left = size;
+
+	pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE;
+	offset %= PAGE_SIZE;
+
+	do {
+		size_t len = left;
+		void *vaddr;
+	
+		if (!pfn_valid(pfn))
+		{
+			/* Cannot map the page, we don't know its physical address.
+			 * Return and hope for the best */
+			if (!xen_feature(XENFEAT_grant_map_identity))
+				return;
+			vaddr = xen_mm32_remap_page(handle) + offset;
+			op(vaddr, len, dir);
+			xen_mm32_unmap(vaddr - offset);
+		} else {
+			struct page *page = pfn_to_page(pfn);
+
+			if (PageHighMem(page)) {
+				if (len + offset > PAGE_SIZE)
+					len = PAGE_SIZE - offset;
+
+				if (cache_is_vipt_nonaliasing()) {
+					vaddr = kmap_atomic(page);
+					op(vaddr + offset, len, dir);
+					kunmap_atomic(vaddr);
+				} else {
+					vaddr = kmap_high_get(page);
+					if (vaddr) {
+						op(vaddr + offset, len, dir);
+						kunmap_high(page);
+					}
+				}
+			} else {
+				vaddr = page_address(page) + offset;
+				op(vaddr, len, dir);
+			}
+		}
+
+		offset = 0;
+		pfn++;
+		left -= len;
+	} while (left);
+}
+
+static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+	/* Cannot use __dma_page_dev_to_cpu because we don't have a
+	 * struct page for handle */
+
+	if (dir != DMA_TO_DEVICE)
+		outer_inv_range(handle, handle + size);
+
+	dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_unmap_area);
+}
+
+static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir)
+{
+
+	dma_cache_maint(handle & PAGE_MASK, handle & ~PAGE_MASK, size, dir, dmac_map_area);
+
+	if (dir == DMA_FROM_DEVICE) {
+		outer_inv_range(handle, handle + size);
+	} else {
+		outer_clean_range(handle, handle + size);
+	}
+}
+
+void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir,
+		struct dma_attrs *attrs)
+
+{
+	if (!__generic_dma_ops(hwdev)->unmap_page)
+		return;
+	if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+		return;
+
+	__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
+}
+
+void xen_dma_sync_single_for_cpu(struct device *hwdev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	if (!__generic_dma_ops(hwdev)->sync_single_for_cpu)
+		return;
+	__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
+}
+
+void xen_dma_sync_single_for_device(struct device *hwdev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	if (!__generic_dma_ops(hwdev)->sync_single_for_device)
+		return;
+	__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
+}
+
+int __init xen_mm32_init(void)
+{
+	int cpu;
+
+	if (!xen_initial_domain())
+		return 0;
+
+	register_cpu_notifier(&xen_mm32_cpu_notifier);
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		if (alloc_xen_mm32_scratch_page(cpu)) {
+			put_online_cpus();
+			unregister_cpu_notifier(&xen_mm32_cpu_notifier);
+			return -ENOMEM;
+		}
+	}
+	put_online_cpus();
+
+	return 0;
+}
+arch_initcall(xen_mm32_init);
author	Stefano Stabellini <stefano.stabellini@eu.citrix.com>	2014-09-10 22:49:41 +0000
committer	Stefano Stabellini <stefano.stabellini@eu.citrix.com>	2014-09-11 18:11:53 +0000
commit	340720be32d458ee11d1117719a8e4b6b82981b2 (patch)
tree	11489bb337f841f9bf3cce1a77771c2412c566c2 /arch/arm/xen
parent	5ebc77de83c7b74543de774afa7395b3b790e65e (diff)
download	talos-op-linux-340720be32d458ee11d1117719a8e4b6b82981b2.tar.gz talos-op-linux-340720be32d458ee11d1117719a8e4b6b82981b2.zip