Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git

author: Steve French <sfrench@us.ibm.com> 2005-11-09 14:33:22 -0800
committer: Steve French <sfrench@us.ibm.com> 2005-11-09 14:33:22 -0800
commit: e82b3aec8d508d2a925a4c766e97f16b7c4dfb1b (patch)
tree: 69d5685ef0c194f651a03e30bff14628b4d45400 /lib
parent: ec58ef03284f0bfa50a04982b74c8c2325a0758e (diff)
parent: ad8f76be48d817b48222411ae16a7dfe257bdb24 (diff)
download: talos-op-linux-e82b3aec8d508d2a925a4c766e97f16b7c4dfb1b.tar.gz
talos-op-linux-e82b3aec8d508d2a925a4c766e97f16b7c4dfb1b.zip
7 files changed, 909 insertions, 71 deletions
diff --git a/lib/Makefile b/lib/Makefile
index 44a46750690a..8535f4d7d1c3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -44,6 +44,8 @@ obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
 obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
 obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
 
+obj-$(CONFIG_SWIOTLB) += swiotlb.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d1c057e71b68..88511c3805ad 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -281,35 +281,60 @@ int radix_tree_insert(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_insert);
 
-/**
- *	radix_tree_lookup    -    perform lookup operation on a radix tree
- *	@root:		radix tree root
- *	@index:		index key
- *
- *	Lookup the item at the position @index in the radix tree @root.
- */
-void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+static inline void **__lookup_slot(struct radix_tree_root *root,
+				   unsigned long index)
 {
 	unsigned int height, shift;
-	struct radix_tree_node *slot;
+	struct radix_tree_node **slot;
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
 		return NULL;
 
 	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-	slot = root->rnode;
+	slot = &root->rnode;
 
 	while (height > 0) {
-		if (slot == NULL)
+		if (*slot == NULL)
 			return NULL;
 
-		slot = slot->slots[(index >> shift) & RADIX_TREE_MAP_MASK];
+		slot = (struct radix_tree_node **)
+			((*slot)->slots +
+				((index >> shift) & RADIX_TREE_MAP_MASK));
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
 
-	return slot;
+	return (void **)slot;
+}
+
+/**
+ *	radix_tree_lookup_slot    -    lookup a slot in a radix tree
+ *	@root:		radix tree root
+ *	@index:		index key
+ *
+ *	Lookup the slot corresponding to the position @index in the radix tree
+ *	@root. This is useful for update-if-exists operations.
+ */
+void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+{
+	return __lookup_slot(root, index);
+}
+EXPORT_SYMBOL(radix_tree_lookup_slot);
+
+/**
+ *	radix_tree_lookup    -    perform lookup operation on a radix tree
+ *	@root:		radix tree root
+ *	@index:		index key
+ *
+ *	Lookup the item at the position @index in the radix tree @root.
+ */
+void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+{
+	void **slot;
+
+	slot = __lookup_slot(root, index);
+	return slot != NULL ? *slot : NULL;
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
diff --git a/lib/reed_solomon/Makefile b/lib/reed_solomon/Makefile
index 747a2de29346..c3d7136827ed 100644
--- a/lib/reed_solomon/Makefile
+++ b/lib/reed_solomon/Makefile
@@ -1,5 +1,5 @@
 #
-# This is a modified version of reed solomon lib, 
+# This is a modified version of reed solomon lib,
 #
 
 obj-$(CONFIG_REED_SOLOMON) += reed_solomon.o
diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c
index d401decd6289..a58df56f09b6 100644
--- a/lib/reed_solomon/decode_rs.c
+++ b/lib/reed_solomon/decode_rs.c
@@ -1,22 +1,22 @@
-/* 
+/*
  * lib/reed_solomon/decode_rs.c
  *
  * Overview:
  *   Generic Reed Solomon encoder / decoder library
- *   
+ *
  * Copyright 2002, Phil Karn, KA9Q
  * May be used under the terms of the GNU General Public License (GPL)
  *
  * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
  *
- * $Id: decode_rs.c,v 1.6 2004/10/22 15:41:47 gleixner Exp $
+ * $Id: decode_rs.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
  *
  */
 
-/* Generic data width independent code which is included by the 
+/* Generic data width independent code which is included by the
  * wrappers.
  */
-{ 
+{
 	int deg_lambda, el, deg_omega;
 	int i, j, r, k, pad;
 	int nn = rs->nn;
@@ -41,9 +41,9 @@
 	pad = nn - nroots - len;
 	if (pad < 0 || pad >= nn)
 		return -ERANGE;
-		
+
 	/* Does the caller provide the syndrome ? */
-	if (s != NULL) 
+	if (s != NULL)
 		goto decode;
 
 	/* form the syndromes; i.e., evaluate data(x) at roots of
@@ -54,11 +54,11 @@
 	for (j = 1; j < len; j++) {
 		for (i = 0; i < nroots; i++) {
 			if (syn[i] == 0) {
-				syn[i] = (((uint16_t) data[j]) ^ 
+				syn[i] = (((uint16_t) data[j]) ^
 					  invmsk) & msk;
 			} else {
 				syn[i] = ((((uint16_t) data[j]) ^
-					   invmsk) & msk) ^ 
+					   invmsk) & msk) ^
 					alpha_to[rs_modnn(rs, index_of[syn[i]] +
 						       (fcr + i) * prim)];
 			}
@@ -70,7 +70,7 @@
 			if (syn[i] == 0) {
 				syn[i] = ((uint16_t) par[j]) & msk;
 			} else {
-				syn[i] = (((uint16_t) par[j]) & msk) ^ 
+				syn[i] = (((uint16_t) par[j]) & msk) ^
 					alpha_to[rs_modnn(rs, index_of[syn[i]] +
 						       (fcr+i)*prim)];
 			}
@@ -99,14 +99,14 @@
 
 	if (no_eras > 0) {
 		/* Init lambda to be the erasure locator polynomial */
-		lambda[1] = alpha_to[rs_modnn(rs, 
+		lambda[1] = alpha_to[rs_modnn(rs,
 					      prim * (nn - 1 - eras_pos[0]))];
 		for (i = 1; i < no_eras; i++) {
 			u = rs_modnn(rs, prim * (nn - 1 - eras_pos[i]));
 			for (j = i + 1; j > 0; j--) {
 				tmp = index_of[lambda[j - 1]];
 				if (tmp != nn) {
-					lambda[j] ^= 
+					lambda[j] ^=
 						alpha_to[rs_modnn(rs, u + tmp)];
 				}
 			}
@@ -127,8 +127,8 @@
 		discr_r = 0;
 		for (i = 0; i < r; i++) {
 			if ((lambda[i] != 0) && (s[r - i - 1] != nn)) {
-				discr_r ^= 
-					alpha_to[rs_modnn(rs, 
+				discr_r ^=
+					alpha_to[rs_modnn(rs,
 							  index_of[lambda[i]] +
 							  s[r - i - 1])];
 			}
@@ -143,7 +143,7 @@
 			t[0] = lambda[0];
 			for (i = 0; i < nroots; i++) {
 				if (b[i] != nn) {
-					t[i + 1] = lambda[i + 1] ^ 
+					t[i + 1] = lambda[i + 1] ^
 						alpha_to[rs_modnn(rs, discr_r +
 								  b[i])];
 				} else
@@ -229,7 +229,7 @@
 		num1 = 0;
 		for (i = deg_omega; i >= 0; i--) {
 			if (omega[i] != nn)
-				num1 ^= alpha_to[rs_modnn(rs, omega[i] + 
+				num1 ^= alpha_to[rs_modnn(rs, omega[i] +
 							i * root[j])];
 		}
 		num2 = alpha_to[rs_modnn(rs, root[j] * (fcr - 1) + nn)];
@@ -239,13 +239,13 @@
 		 * lambda_pr of lambda[i] */
 		for (i = min(deg_lambda, nroots - 1) & ~1; i >= 0; i -= 2) {
 			if (lambda[i + 1] != nn) {
-				den ^= alpha_to[rs_modnn(rs, lambda[i + 1] + 
+				den ^= alpha_to[rs_modnn(rs, lambda[i + 1] +
 						       i * root[j])];
 			}
 		}
 		/* Apply error to data */
 		if (num1 != 0 && loc[j] >= pad) {
-			uint16_t cor = alpha_to[rs_modnn(rs,index_of[num1] + 
+			uint16_t cor = alpha_to[rs_modnn(rs,index_of[num1] +
 						       index_of[num2] +
 						       nn - index_of[den])];
 			/* Store the error correction pattern, if a
diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c
index 237bf65ae886..0b5b1a6728ec 100644
--- a/lib/reed_solomon/encode_rs.c
+++ b/lib/reed_solomon/encode_rs.c
@@ -1,19 +1,19 @@
-/* 
+/*
  * lib/reed_solomon/encode_rs.c
  *
  * Overview:
  *   Generic Reed Solomon encoder / decoder library
- *   
+ *
  * Copyright 2002, Phil Karn, KA9Q
  * May be used under the terms of the GNU General Public License (GPL)
  *
  * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
  *
- * $Id: encode_rs.c,v 1.4 2004/10/22 15:41:47 gleixner Exp $
+ * $Id: encode_rs.c,v 1.5 2005/11/07 11:14:59 gleixner Exp $
  *
  */
 
-/* Generic data width independent code which is included by the 
+/* Generic data width independent code which is included by the
  * wrappers.
  * int encode_rsX (struct rs_control *rs, uintX_t *data, int len, uintY_t *par)
  */
@@ -35,16 +35,16 @@
 	for (i = 0; i < len; i++) {
 		fb = index_of[((((uint16_t) data[i])^invmsk) & msk) ^ par[0]];
 		/* feedback term is non-zero */
-		if (fb != nn) {	
+		if (fb != nn) {
 			for (j = 1; j < nroots; j++) {
-				par[j] ^= alpha_to[rs_modnn(rs, fb + 
+				par[j] ^= alpha_to[rs_modnn(rs, fb +
 							 genpoly[nroots - j])];
 			}
 		}
 		/* Shift */
 		memmove(&par[0], &par[1], sizeof(uint16_t) * (nroots - 1));
 		if (fb != nn) {
-			par[nroots - 1] = alpha_to[rs_modnn(rs, 
+			par[nroots - 1] = alpha_to[rs_modnn(rs,
 							    fb + genpoly[0])];
 		} else {
 			par[nroots - 1] = 0;
diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c
index 6604e3b1940c..f5fef948a415 100644
--- a/lib/reed_solomon/reed_solomon.c
+++ b/lib/reed_solomon/reed_solomon.c
@@ -1,22 +1,22 @@
-/* 
+/*
  * lib/reed_solomon/rslib.c
  *
  * Overview:
  *   Generic Reed Solomon encoder / decoder library
- *   
+ *
  * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de)
  *
  * Reed Solomon code lifted from reed solomon library written by Phil Karn
  * Copyright 2002 Phil Karn, KA9Q
  *
- * $Id: rslib.c,v 1.5 2004/10/22 15:41:47 gleixner Exp $
+ * $Id: rslib.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
  * Description:
- *	
+ *
  * The generic Reed Solomon library provides runtime configurable
  * encoding / decoding of RS codes.
  * Each user must call init_rs to get a pointer to a rs_control
@@ -25,11 +25,11 @@
  * If a structure is generated then the polynomial arrays for
  * fast encoding / decoding are built. This can take some time so
  * make sure not to call this function from a time critical path.
- * Usually a module / driver should initialize the necessary 
+ * Usually a module / driver should initialize the necessary
  * rs_control structure on module / driver init and release it
  * on exit.
- * The encoding puts the calculated syndrome into a given syndrome 
- * buffer. 
+ * The encoding puts the calculated syndrome into a given syndrome
+ * buffer.
  * The decoding is a two step process. The first step calculates
  * the syndrome over the received (data + syndrome) and calls the
  * second stage, which does the decoding / error correction itself.
@@ -51,7 +51,7 @@ static LIST_HEAD (rslist);
 /* Protection for the list */
 static DECLARE_MUTEX(rslistlock);
 
-/** 
+/**
  * rs_init - Initialize a Reed-Solomon codec
  *
  * @symsize:	symbol size, bits (1-8)
@@ -63,7 +63,7 @@ static DECLARE_MUTEX(rslistlock);
  * Allocate a control structure and the polynom arrays for faster
  * en/decoding. Fill the arrays according to the given parameters
  */
-static struct rs_control *rs_init(int symsize, int gfpoly, int fcr, 
+static struct rs_control *rs_init(int symsize, int gfpoly, int fcr,
 				   int prim, int nroots)
 {
 	struct rs_control *rs;
@@ -124,15 +124,15 @@ static struct rs_control *rs_init(int symsize, int gfpoly, int fcr,
 		/* Multiply rs->genpoly[] by  @**(root + x) */
 		for (j = i; j > 0; j--) {
 			if (rs->genpoly[j] != 0) {
-				rs->genpoly[j] = rs->genpoly[j -1] ^ 
-					rs->alpha_to[rs_modnn(rs, 
+				rs->genpoly[j] = rs->genpoly[j -1] ^
+					rs->alpha_to[rs_modnn(rs,
 					rs->index_of[rs->genpoly[j]] + root)];
 			} else
 				rs->genpoly[j] = rs->genpoly[j - 1];
 		}
 		/* rs->genpoly[0] can never be zero */
-		rs->genpoly[0] = 
-			rs->alpha_to[rs_modnn(rs, 
+		rs->genpoly[0] =
+			rs->alpha_to[rs_modnn(rs,
 				rs->index_of[rs->genpoly[0]] + root)];
 	}
 	/* convert rs->genpoly[] to index form for quicker encoding */
@@ -153,7 +153,7 @@ errrs:
 }
 
 
-/** 
+/**
  *  free_rs - Free the rs control structure, if its not longer used
  *
  *  @rs:	the control structure which is not longer used by the
@@ -173,19 +173,19 @@ void free_rs(struct rs_control *rs)
 	up(&rslistlock);
 }
 
-/** 
+/**
  * init_rs - Find a matching or allocate a new rs control structure
  *
  *  @symsize:	the symbol size (number of bits)
  *  @gfpoly:	the extended Galois field generator polynomial coefficients,
  *		with the 0th coefficient in the low order bit. The polynomial
  *		must be primitive;
- *  @fcr:  	the first consecutive root of the rs code generator polynomial 
+ *  @fcr:  	the first consecutive root of the rs code generator polynomial
  *		in index form
  *  @prim:	primitive element to generate polynomial roots
  *  @nroots:	RS code generator polynomial degree (number of roots)
  */
-struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim, 
+struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim,
 			   int nroots)
 {
 	struct list_head	*tmp;
@@ -198,9 +198,9 @@ struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim,
     		return NULL;
 	if (prim <= 0 || prim >= (1<<symsize))
     		return NULL;
-	if (nroots < 0 || nroots >= (1<<symsize) || nroots > 8)
+	if (nroots < 0 || nroots >= (1<<symsize))
 		return NULL;
-	
+
 	down(&rslistlock);
 
 	/* Walk through the list and look for a matching entry */
@@ -211,9 +211,9 @@ struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim,
 		if (gfpoly != rs->gfpoly)
 			continue;
 		if (fcr != rs->fcr)
-			continue;	
+			continue;
 		if (prim != rs->prim)
-			continue;	
+			continue;
 		if (nroots != rs->nroots)
 			continue;
 		/* We have a matching one already */
@@ -227,18 +227,18 @@ struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim,
 		rs->users = 1;
 		list_add(&rs->list, &rslist);
 	}
-out:	
+out:
 	up(&rslistlock);
 	return rs;
 }
 
 #ifdef CONFIG_REED_SOLOMON_ENC8
-/** 
+/**
  *  encode_rs8 - Calculate the parity for data values (8bit data width)
  *
  *  @rs:	the rs control structure
  *  @data:	data field of a given type
- *  @len:	data length 
+ *  @len:	data length
  *  @par:	parity data, must be initialized by caller (usually all 0)
  *  @invmsk:	invert data mask (will be xored on data)
  *
@@ -246,7 +246,7 @@ out:
  *  symbol size > 8. The calling code must take care of encoding of the
  *  syndrome result for storage itself.
  */
-int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par, 
+int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par,
 	       uint16_t invmsk)
 {
 #include "encode_rs.c"
@@ -255,7 +255,7 @@ EXPORT_SYMBOL_GPL(encode_rs8);
 #endif
 
 #ifdef CONFIG_REED_SOLOMON_DEC8
-/** 
+/**
  *  decode_rs8 - Decode codeword (8bit data width)
  *
  *  @rs:	the rs control structure
@@ -273,7 +273,7 @@ EXPORT_SYMBOL_GPL(encode_rs8);
  *  syndrome result and the received parity before calling this code.
  */
 int decode_rs8(struct rs_control *rs, uint8_t *data, uint16_t *par, int len,
-	       uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, 
+	       uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
 	       uint16_t *corr)
 {
 #include "decode_rs.c"
@@ -287,13 +287,13 @@ EXPORT_SYMBOL_GPL(decode_rs8);
  *
  *  @rs:	the rs control structure
  *  @data:	data field of a given type
- *  @len:	data length 
+ *  @len:	data length
  *  @par:	parity data, must be initialized by caller (usually all 0)
  *  @invmsk:	invert data mask (will be xored on data, not on parity!)
  *
  *  Each field in the data array contains up to symbol size bits of valid data.
  */
-int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par, 
+int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par,
 	uint16_t invmsk)
 {
 #include "encode_rs.c"
@@ -302,7 +302,7 @@ EXPORT_SYMBOL_GPL(encode_rs16);
 #endif
 
 #ifdef CONFIG_REED_SOLOMON_DEC16
-/** 
+/**
  *  decode_rs16 - Decode codeword (16bit data width)
  *
  *  @rs:	the rs control structure
@@ -312,13 +312,13 @@ EXPORT_SYMBOL_GPL(encode_rs16);
  *  @s:		syndrome data field (if NULL, syndrome is calculated)
  *  @no_eras:	number of erasures
  *  @eras_pos:	position of erasures, can be NULL
- *  @invmsk:	invert data mask (will be xored on data, not on parity!) 
+ *  @invmsk:	invert data mask (will be xored on data, not on parity!)
  *  @corr:	buffer to store correction bitmask on eras_pos
  *
  *  Each field in the data array contains up to symbol size bits of valid data.
  */
 int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len,
-		uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, 
+		uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
 		uint16_t *corr)
 {
 #include "decode_rs.c"
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
new file mode 100644
index 000000000000..57216f3544ca
--- /dev/null
+++ b/lib/swiotlb.c
@@ -0,0 +1,811 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is for IA-64 and EM64T platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
+ * Copyright (C) 2000, 2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 03/05/07 davidm	Switch from PCI-DMA to generic device DMA API.
+ * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
+ *			unnecessary i-cache flushing.
+ * 04/07/.. ak		Better overflow handling. Assorted fixes.
+ * 05/09/10 linville	Add support for syncing ranges, support syncing for
+ *			DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
+ */
+
+#include <linux/cache.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+
+#include <asm/io.h>
+#include <asm/dma.h>
+#include <asm/scatterlist.h>
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#define OFFSET(val,align) ((unsigned long)	\
+	                   ( (val) & ( (align) - 1)))
+
+#define SG_ENT_VIRT_ADDRESS(sg)	(page_address((sg)->page) + (sg)->offset)
+#define SG_ENT_PHYS_ADDRESS(SG)	virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
+
+/*
+ * Maximum allowable number of contiguous slabs to map,
+ * must be a power of 2.  What is the appropriate value ?
+ * The complexity of {map,unmap}_single is linearly dependent on this value.
+ */
+#define IO_TLB_SEGSIZE	128
+
+/*
+ * log of the size of each IO TLB slab.  The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
+#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
+
+/*
+ * Minimum IO TLB size to bother booting with.  Systems with mainly
+ * 64bit capable cards will only lightly use the swiotlb.  If we can't
+ * allocate a contiguous 1MB, we're probably in trouble anyway.
+ */
+#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+
+/*
+ * Enumeration for sync targets
+ */
+enum dma_sync_target {
+	SYNC_FOR_CPU = 0,
+	SYNC_FOR_DEVICE = 1,
+};
+
+int swiotlb_force;
+
+/*
+ * Used to do a quick range check in swiotlb_unmap_single and
+ * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * API.
+ */
+static char *io_tlb_start, *io_tlb_end;
+
+/*
+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
+ * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
+ */
+static unsigned long io_tlb_nslabs;
+
+/*
+ * When the IOMMU overflows we return a fallback buffer. This sets the size.
+ */
+static unsigned long io_tlb_overflow = 32*1024;
+
+void *io_tlb_overflow_buffer;
+
+/*
+ * This is a free list describing the number of free entries available from
+ * each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry
+ * for the sync operations.
+ */
+static unsigned char **io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */
+static DEFINE_SPINLOCK(io_tlb_lock);
+
+static int __init
+setup_io_tlb_npages(char *str)
+{
+	if (isdigit(*str)) {
+		io_tlb_nslabs = simple_strtoul(str, &str, 0);
+		/* avoid tail segment of size < IO_TLB_SEGSIZE */
+		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	}
+	if (*str == ',')
+		++str;
+	if (!strcmp(str, "force"))
+		swiotlb_force = 1;
+	return 1;
+}
+__setup("swiotlb=", setup_io_tlb_npages);
+/* make io_tlb_overflow tunable too? */
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void
+swiotlb_init_with_default_size (size_t default_size)
+{
+	unsigned long i;
+
+	if (!io_tlb_nslabs) {
+		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	}
+
+	/*
+	 * Get IO TLB memory from the low pages
+	 */
+	io_tlb_start = alloc_bootmem_low_pages_limit(io_tlb_nslabs *
+					     (1 << IO_TLB_SHIFT), 0x100000000);
+	if (!io_tlb_start)
+		panic("Cannot allocate SWIOTLB buffer");
+	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
+
+	/*
+	 * Allocate and initialize the free list array.  This array is used
+	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+	 * between io_tlb_start and io_tlb_end.
+	 */
+	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+	for (i = 0; i < io_tlb_nslabs; i++)
+ 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+	io_tlb_index = 0;
+	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
+
+	/*
+	 * Get the overflow emergency buffer
+	 */
+	io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+	printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
+	       virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
+}
+
+void
+swiotlb_init (void)
+{
+	swiotlb_init_with_default_size(64 * (1<<20));	/* default to 64MB */
+}
+
+/*
+ * Systems with larger DMA zones (those that don't support ISA) can
+ * initialize the swiotlb later using the slab allocator if needed.
+ * This should be just like above, but with some error catching.
+ */
+int
+swiotlb_late_init_with_default_size (size_t default_size)
+{
+	unsigned long i, req_nslabs = io_tlb_nslabs;
+	unsigned int order;
+
+	if (!io_tlb_nslabs) {
+		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	}
+
+	/*
+	 * Get IO TLB memory from the low pages
+	 */
+	order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
+	io_tlb_nslabs = SLABS_PER_PAGE << order;
+
+	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
+		io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+		                                        order);
+		if (io_tlb_start)
+			break;
+		order--;
+	}
+
+	if (!io_tlb_start)
+		goto cleanup1;
+
+	if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) {
+		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
+		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
+		io_tlb_nslabs = SLABS_PER_PAGE << order;
+	}
+	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
+	memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT));
+
+	/*
+	 * Allocate and initialize the free list array.  This array is used
+	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+	 * between io_tlb_start and io_tlb_end.
+	 */
+	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
+	                              get_order(io_tlb_nslabs * sizeof(int)));
+	if (!io_tlb_list)
+		goto cleanup2;
+
+	for (i = 0; i < io_tlb_nslabs; i++)
+ 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+	io_tlb_index = 0;
+
+	io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
+	                           get_order(io_tlb_nslabs * sizeof(char *)));
+	if (!io_tlb_orig_addr)
+		goto cleanup3;
+
+	memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
+
+	/*
+	 * Get the overflow emergency buffer
+	 */
+	io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
+	                                          get_order(io_tlb_overflow));
+	if (!io_tlb_overflow_buffer)
+		goto cleanup4;
+
+	printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - "
+	       "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20,
+	       virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
+
+	return 0;
+
+cleanup4:
+	free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
+	                                                      sizeof(char *)));
+	io_tlb_orig_addr = NULL;
+cleanup3:
+	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+	                                                 sizeof(int)));
+	io_tlb_list = NULL;
+	io_tlb_end = NULL;
+cleanup2:
+	free_pages((unsigned long)io_tlb_start, order);
+	io_tlb_start = NULL;
+cleanup1:
+	io_tlb_nslabs = req_nslabs;
+	return -ENOMEM;
+}
+
+static inline int
+address_needs_mapping(struct device *hwdev, dma_addr_t addr)
+{
+	dma_addr_t mask = 0xffffffff;
+	/* If the device has a mask, use it, otherwise default to 32 bits */
+	if (hwdev && hwdev->dma_mask)
+		mask = *hwdev->dma_mask;
+	return (addr & ~mask) != 0;
+}
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+static void *
+map_single(struct device *hwdev, char *buffer, size_t size, int dir)
+{
+	unsigned long flags;
+	char *dma_addr;
+	unsigned int nslots, stride, index, wrap;
+	int i;
+
+	/*
+	 * For mappings greater than a page, we limit the stride (and
+	 * hence alignment) to a page size.
+	 */
+	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	if (size > PAGE_SIZE)
+		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+	else
+		stride = 1;
+
+	if (!nslots)
+		BUG();
+
+	/*
+	 * Find suitable number of IO TLB entries size that will fit this
+	 * request and allocate a buffer from that IO TLB pool.
+	 */
+	spin_lock_irqsave(&io_tlb_lock, flags);
+	{
+		wrap = index = ALIGN(io_tlb_index, stride);
+
+		if (index >= io_tlb_nslabs)
+			wrap = index = 0;
+
+		do {
+			/*
+			 * If we find a slot that indicates we have 'nslots'
+			 * number of contiguous buffers, we allocate the
+			 * buffers from that slot and mark the entries as '0'
+			 * indicating unavailable.
+			 */
+			if (io_tlb_list[index] >= nslots) {
+				int count = 0;
+
+				for (i = index; i < (int) (index + nslots); i++)
+					io_tlb_list[i] = 0;
+				for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
+					io_tlb_list[i] = ++count;
+				dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+
+				/*
+				 * Update the indices to avoid searching in
+				 * the next round.
+				 */
+				io_tlb_index = ((index + nslots) < io_tlb_nslabs
+						? (index + nslots) : 0);
+
+				goto found;
+			}
+			index += stride;
+			if (index >= io_tlb_nslabs)
+				index = 0;
+		} while (index != wrap);
+
+		spin_unlock_irqrestore(&io_tlb_lock, flags);
+		return NULL;
+	}
+  found:
+	spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+	/*
+	 * Save away the mapping from the original address to the DMA address.
+	 * This is needed when we sync the memory.  Then we sync the buffer if
+	 * needed.
+	 */
+	io_tlb_orig_addr[index] = buffer;
+	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+		memcpy(dma_addr, buffer, size);
+
+	return dma_addr;
+}
+
+/*
+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ */
+static void
+unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+{
+	unsigned long flags;
+	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	char *buffer = io_tlb_orig_addr[index];
+
+	/*
+	 * First, sync the memory before unmapping the entry
+	 */
+	if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+		/*
+		 * bounce... copy the data back into the original buffer * and
+		 * delete the bounce buffer.
+		 */
+		memcpy(buffer, dma_addr, size);
+
+	/*
+	 * Return the buffer to the free list by setting the corresponding
+	 * entries to indicate the number of contigous entries available.
+	 * While returning the entries to the free list, we merge the entries
+	 * with slots below and above the pool being returned.
+	 */
+	spin_lock_irqsave(&io_tlb_lock, flags);
+	{
+		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
+			 io_tlb_list[index + nslots] : 0);
+		/*
+		 * Step 1: return the slots to the free list, merging the
+		 * slots with superceeding slots
+		 */
+		for (i = index + nslots - 1; i >= index; i--)
+			io_tlb_list[i] = ++count;
+		/*
+		 * Step 2: merge the returned slots with the preceding slots,
+		 * if available (non zero)
+		 */
+		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
+			io_tlb_list[i] = ++count;
+	}
+	spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+
+static void
+sync_single(struct device *hwdev, char *dma_addr, size_t size,
+	    int dir, int target)
+{
+	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	char *buffer = io_tlb_orig_addr[index];
+
+	switch (target) {
+	case SYNC_FOR_CPU:
+		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
+			memcpy(buffer, dma_addr, size);
+		else if (dir != DMA_TO_DEVICE)
+			BUG();
+		break;
+	case SYNC_FOR_DEVICE:
+		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+			memcpy(dma_addr, buffer, size);
+		else if (dir != DMA_FROM_DEVICE)
+			BUG();
+		break;
+	default:
+		BUG();
+	}
+}
+
+void *
+swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+		       dma_addr_t *dma_handle, gfp_t flags)
+{
+	unsigned long dev_addr;
+	void *ret;
+	int order = get_order(size);
+
+	/*
+	 * XXX fix me: the DMA API should pass us an explicit DMA mask
+	 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
+	 * bit range instead of a 16MB one).
+	 */
+	flags |= GFP_DMA;
+
+	ret = (void *)__get_free_pages(flags, order);
+	if (ret && address_needs_mapping(hwdev, virt_to_phys(ret))) {
+		/*
+		 * The allocated memory isn't reachable by the device.
+		 * Fall back on swiotlb_map_single().
+		 */
+		free_pages((unsigned long) ret, order);
+		ret = NULL;
+	}
+	if (!ret) {
+		/*
+		 * We are either out of memory or the device can't DMA
+		 * to GFP_DMA memory; fall back on
+		 * swiotlb_map_single(), which will grab memory from
+		 * the lowest available address range.
+		 */
+		dma_addr_t handle;
+		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
+		if (dma_mapping_error(handle))
+			return NULL;
+
+		ret = phys_to_virt(handle);
+	}
+
+	memset(ret, 0, size);
+	dev_addr = virt_to_phys(ret);
+
+	/* Confirm address can be DMA'd by device */
+	if (address_needs_mapping(hwdev, dev_addr)) {
+		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n",
+		       (unsigned long long)*hwdev->dma_mask, dev_addr);
+		panic("swiotlb_alloc_coherent: allocated memory is out of "
+		      "range for device");
+	}
+	*dma_handle = dev_addr;
+	return ret;
+}
+
+void
+swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
+		      dma_addr_t dma_handle)
+{
+	if (!(vaddr >= (void *)io_tlb_start
+                    && vaddr < (void *)io_tlb_end))
+		free_pages((unsigned long) vaddr, get_order(size));
+	else
+		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+		swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
+}
+
+static void
+swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+{
+	/*
+	 * Ran out of IOMMU space for this operation. This is very bad.
+	 * Unfortunately the drivers cannot handle this operation properly.
+	 * unless they check for dma_mapping_error (most don't)
+	 * When the mapping is small enough return a static buffer to limit
+	 * the damage, or panic when the transfer is too big.
+	 */
+	printk(KERN_ERR "DMA: Out of SW-IOMMU space for %lu bytes at "
+	       "device %s\n", size, dev ? dev->bus_id : "?");
+
+	if (size > io_tlb_overflow && do_panic) {
+		if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+			panic("DMA: Memory would be corrupted\n");
+		if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+			panic("DMA: Random memory would be DMAed\n");
+	}
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.  The
+ * physical address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory until
+ * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ */
+dma_addr_t
+swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
+{
+	unsigned long dev_addr = virt_to_phys(ptr);
+	void *map;
+
+	if (dir == DMA_NONE)
+		BUG();
+	/*
+	 * If the pointer passed in happens to be in the device's DMA window,
+	 * we can safely return the device addr and not worry about bounce
+	 * buffering it.
+	 */
+	if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
+		return dev_addr;
+
+	/*
+	 * Oh well, have to allocate and map a bounce buffer.
+	 */
+	map = map_single(hwdev, ptr, size, dir);
+	if (!map) {
+		swiotlb_full(hwdev, size, dir, 1);
+		map = io_tlb_overflow_buffer;
+	}
+
+	dev_addr = virt_to_phys(map);
+
+	/*
+	 * Ensure that the address returned is DMA'ble
+	 */
+	if (address_needs_mapping(hwdev, dev_addr))
+		panic("map_single: bounce buffer is not DMA'ble");
+
+	return dev_addr;
+}
+
+/*
+ * Since DMA is i-cache coherent, any (complete) pages that were written via
+ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+ * flush them when they get mapped into an executable vm-area.
+ */
+static void
+mark_clean(void *addr, size_t size)
+{
+	unsigned long pg_addr, end;
+
+	pg_addr = PAGE_ALIGN((unsigned long) addr);
+	end = (unsigned long) addr + size;
+	while (pg_addr + PAGE_SIZE <= end) {
+		struct page *page = virt_to_page(pg_addr);
+		set_bit(PG_arch_1, &page->flags);
+		pg_addr += PAGE_SIZE;
+	}
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The dma_addr and size must
+ * match what was provided for in a previous swiotlb_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+void
+swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
+		     int dir)
+{
+	char *dma_addr = phys_to_virt(dev_addr);
+
+	if (dir == DMA_NONE)
+		BUG();
+	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+		unmap_single(hwdev, dma_addr, size, dir);
+	else if (dir == DMA_FROM_DEVICE)
+		mark_clean(dma_addr, size);
+}
+
+/*
+ * Make physical memory consistent for a single streaming mode DMA translation
+ * after a transfer.
+ *
+ * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * using the cpu, yet do not wish to teardown the dma mapping, you must
+ * call this function before doing so.  At the next point you give the dma
+ * address back to the card, you must first perform a
+ * swiotlb_dma_sync_for_device, and then the device again owns the buffer
+ */
+static inline void
+swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
+		    size_t size, int dir, int target)
+{
+	char *dma_addr = phys_to_virt(dev_addr);
+
+	if (dir == DMA_NONE)
+		BUG();
+	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+		sync_single(hwdev, dma_addr, size, dir, target);
+	else if (dir == DMA_FROM_DEVICE)
+		mark_clean(dma_addr, size);
+}
+
+void
+swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+			    size_t size, int dir)
+{
+	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
+}
+
+void
+swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+			       size_t size, int dir)
+{
+	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
+}
+
+/*
+ * Same as above, but for a sub-range of the mapping.
+ */
+static inline void
+swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
+			  unsigned long offset, size_t size,
+			  int dir, int target)
+{
+	char *dma_addr = phys_to_virt(dev_addr) + offset;
+
+	if (dir == DMA_NONE)
+		BUG();
+	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+		sync_single(hwdev, dma_addr, size, dir, target);
+	else if (dir == DMA_FROM_DEVICE)
+		mark_clean(dma_addr, size);
+}
+
+void
+swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+				  unsigned long offset, size_t size, int dir)
+{
+	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
+				  SYNC_FOR_CPU);
+}
+
+void
+swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
+				     unsigned long offset, size_t size, int dir)
+{
+	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
+				  SYNC_FOR_DEVICE);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the above swiotlb_map_single
+ * interface.  Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length.  They are obtained via
+ * sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * same here.
+ */
+int
+swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+	       int dir)
+{
+	void *addr;
+	unsigned long dev_addr;
+	int i;
+
+	if (dir == DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++) {
+		addr = SG_ENT_VIRT_ADDRESS(sg);
+		dev_addr = virt_to_phys(addr);
+		if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
+			sg->dma_address = (dma_addr_t) virt_to_phys(map_single(hwdev, addr, sg->length, dir));
+			if (!sg->dma_address) {
+				/* Don't panic here, we expect map_sg users
+				   to do proper error handling. */
+				swiotlb_full(hwdev, sg->length, dir, 0);
+				swiotlb_unmap_sg(hwdev, sg - i, i, dir);
+				sg[0].dma_length = 0;
+				return 0;
+			}
+		} else
+			sg->dma_address = dev_addr;
+		sg->dma_length = sg->length;
+	}
+	return nelems;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
+ * concerning calls here are the same as for swiotlb_unmap_single() above.
+ */
+void
+swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+		 int dir)
+{
+	int i;
+
+	if (dir == DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++)
+		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+			unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir);
+		else if (dir == DMA_FROM_DEVICE)
+			mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA translations
+ * after a transfer.
+ *
+ * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
+ * and usage.
+ */
+static inline void
+swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg,
+		int nelems, int dir, int target)
+{
+	int i;
+
+	if (dir == DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++)
+		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+			sync_single(hwdev, (void *) sg->dma_address,
+				    sg->dma_length, dir, target);
+}
+
+void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+			int nelems, int dir)
+{
+	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
+}
+
+void
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+			   int nelems, int dir)
+{
+	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
+}
+
+int
+swiotlb_dma_mapping_error(dma_addr_t dma_addr)
+{
+	return (dma_addr == virt_to_phys(io_tlb_overflow_buffer));
+}
+
+/*
+ * Return whether the given device DMA address mask can be supported
+ * properly.  For example, if your device can only drive the low 24-bits
+ * during bus mastering, then you would pass 0x00ffffff as the mask to
+ * this function.
+ */
+int
+swiotlb_dma_supported (struct device *hwdev, u64 mask)
+{
+	return (virt_to_phys (io_tlb_end) - 1) <= mask;
+}
+
+EXPORT_SYMBOL(swiotlb_init);
+EXPORT_SYMBOL(swiotlb_map_single);
+EXPORT_SYMBOL(swiotlb_unmap_single);
+EXPORT_SYMBOL(swiotlb_map_sg);
+EXPORT_SYMBOL(swiotlb_unmap_sg);
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
+EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
+EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
+EXPORT_SYMBOL(swiotlb_dma_mapping_error);
+EXPORT_SYMBOL(swiotlb_alloc_coherent);
+EXPORT_SYMBOL(swiotlb_free_coherent);
+EXPORT_SYMBOL(swiotlb_dma_supported);
author	Steve French <sfrench@us.ibm.com>	2005-11-09 14:33:22 -0800
committer	Steve French <sfrench@us.ibm.com>	2005-11-09 14:33:22 -0800
commit	e82b3aec8d508d2a925a4c766e97f16b7c4dfb1b (patch)
tree	69d5685ef0c194f651a03e30bff14628b4d45400 /lib
parent	ec58ef03284f0bfa50a04982b74c8c2325a0758e (diff)
parent	ad8f76be48d817b48222411ae16a7dfe257bdb24 (diff)
download	talos-op-linux-e82b3aec8d508d2a925a4c766e97f16b7c4dfb1b.tar.gz talos-op-linux-e82b3aec8d508d2a925a4c766e97f16b7c4dfb1b.zip