Merge branch 'next' of git://git.infradead.org/users/vkoul/slave-dma

Pull slave-dmaengine changes from Vinod Koul: "This brings for slave dmaengine: - Change dma notification flag to DMA_COMPLETE from DMA_SUCCESS as dmaengine can only transfer and not verify validaty of dma transfers - Bunch of fixes across drivers: - cppi41 driver fixes from Daniel - 8 channel freescale dma engine support and updated bindings from Hongbo - msx-dma fixes and cleanup by Markus - DMAengine updates from Dan: - Bartlomiej and Dan finalized a rework of the dma address unmap implementation. - In the course of testing 1/ a collection of enhancements to dmatest fell out. Notably basic performance statistics, and fixed / enhanced test control through new module parameters 'run', 'wait', 'noverify', and 'verbose'. Thanks to Andriy and Linus [Walleij] for their review. - Testing the raid related corner cases of 1/ triggered bugs in the recently added 16-source operation support in the ioatdma driver. - Some minor fixes / cleanups to mv_xor and ioatdma" * 'next' of git://git.infradead.org/users/vkoul/slave-dma: (99 commits) dma: mv_xor: Fix mis-usage of mmio 'base' and 'high_base' registers dma: mv_xor: Remove unneeded NULL address check ioat: fix ioat3_irq_reinit ioat: kill msix_single_vector support raid6test: add new corner case for ioatdma driver ioatdma: clean up sed pool kmem_cache ioatdma: fix selection of 16 vs 8 source path ioatdma: fix sed pool selection ioatdma: Fix bug in selftest after removal of DMA_MEMSET. dmatest: verbose mode dmatest: convert to dmaengine_unmap_data dmatest: add a 'wait' parameter dmatest: add basic performance metrics dmatest: add support for skipping verification and random data setup dmatest: use pseudo random numbers dmatest: support xor-only, or pq-only channels in tests dmatest: restore ability to start test at module load and init dmatest: cleanup redundant "dmatest: " prefixes dmatest: replace stored results mechanism, with uniform messages Revert "dmatest: append verify result to results" ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-11-20 13:20:24 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-11-20 13:20:24 -0800
commit: e6d69a60b77a6ea8d5f9d41765c7571bb8d45531 (patch)
tree: 4ea3fe7c49a864da2ce7ffb51a703661826dc15d /drivers/dma/edma.c
parent: 5a1efc6e68a095917277459091fafba6a6baef17 (diff)
parent: df12a3178d340319b1955be6b973a4eb84aff754 (diff)
download: blackbird-op-linux-e6d69a60b77a6ea8d5f9d41765c7571bb8d45531.tar.gz
blackbird-op-linux-e6d69a60b77a6ea8d5f9d41765c7571bb8d45531.zip
1 files changed, 286 insertions, 83 deletions
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index bef8a368c8dd..2539ea0cbc63 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -46,14 +46,21 @@
 #define EDMA_CHANS	64
 #endif /* CONFIG_ARCH_DAVINCI_DA8XX */
 
-/* Max of 16 segments per channel to conserve PaRAM slots */
-#define MAX_NR_SG		16
+/*
+ * Max of 20 segments per channel to conserve PaRAM slots
+ * Also note that MAX_NR_SG should be atleast the no.of periods
+ * that are required for ASoC, otherwise DMA prep calls will
+ * fail. Today davinci-pcm is the only user of this driver and
+ * requires atleast 17 slots, so we setup the default to 20.
+ */
+#define MAX_NR_SG		20
 #define EDMA_MAX_SLOTS		MAX_NR_SG
 #define EDMA_DESCRIPTORS	16
 
 struct edma_desc {
 	struct virt_dma_desc		vdesc;
 	struct list_head		node;
+	int				cyclic;
 	int				absync;
 	int				pset_nr;
 	int				processed;
@@ -167,8 +174,13 @@ static void edma_execute(struct edma_chan *echan)
 	 * then setup a link to the dummy slot, this results in all future
 	 * events being absorbed and that's OK because we're done
 	 */
-	if (edesc->processed == edesc->pset_nr)
-		edma_link(echan->slot[nslots-1], echan->ecc->dummy_slot);
+	if (edesc->processed == edesc->pset_nr) {
+		if (edesc->cyclic)
+			edma_link(echan->slot[nslots-1], echan->slot[1]);
+		else
+			edma_link(echan->slot[nslots-1],
+				  echan->ecc->dummy_slot);
+	}
 
 	edma_resume(echan->ch_num);
 
@@ -250,6 +262,117 @@ static int edma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	return ret;
 }
 
+/*
+ * A PaRAM set configuration abstraction used by other modes
+ * @chan: Channel who's PaRAM set we're configuring
+ * @pset: PaRAM set to initialize and setup.
+ * @src_addr: Source address of the DMA
+ * @dst_addr: Destination address of the DMA
+ * @burst: In units of dev_width, how much to send
+ * @dev_width: How much is the dev_width
+ * @dma_length: Total length of the DMA transfer
+ * @direction: Direction of the transfer
+ */
+static int edma_config_pset(struct dma_chan *chan, struct edmacc_param *pset,
+	dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+	enum dma_slave_buswidth dev_width, unsigned int dma_length,
+	enum dma_transfer_direction direction)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	int acnt, bcnt, ccnt, cidx;
+	int src_bidx, dst_bidx, src_cidx, dst_cidx;
+	int absync;
+
+	acnt = dev_width;
+	/*
+	 * If the maxburst is equal to the fifo width, use
+	 * A-synced transfers. This allows for large contiguous
+	 * buffer transfers using only one PaRAM set.
+	 */
+	if (burst == 1) {
+		/*
+		 * For the A-sync case, bcnt and ccnt are the remainder
+		 * and quotient respectively of the division of:
+		 * (dma_length / acnt) by (SZ_64K -1). This is so
+		 * that in case bcnt over flows, we have ccnt to use.
+		 * Note: In A-sync tranfer only, bcntrld is used, but it
+		 * only applies for sg_dma_len(sg) >= SZ_64K.
+		 * In this case, the best way adopted is- bccnt for the
+		 * first frame will be the remainder below. Then for
+		 * every successive frame, bcnt will be SZ_64K-1. This
+		 * is assured as bcntrld = 0xffff in end of function.
+		 */
+		absync = false;
+		ccnt = dma_length / acnt / (SZ_64K - 1);
+		bcnt = dma_length / acnt - ccnt * (SZ_64K - 1);
+		/*
+		 * If bcnt is non-zero, we have a remainder and hence an
+		 * extra frame to transfer, so increment ccnt.
+		 */
+		if (bcnt)
+			ccnt++;
+		else
+			bcnt = SZ_64K - 1;
+		cidx = acnt;
+	} else {
+		/*
+		 * If maxburst is greater than the fifo address_width,
+		 * use AB-synced transfers where A count is the fifo
+		 * address_width and B count is the maxburst. In this
+		 * case, we are limited to transfers of C count frames
+		 * of (address_width * maxburst) where C count is limited
+		 * to SZ_64K-1. This places an upper bound on the length
+		 * of an SG segment that can be handled.
+		 */
+		absync = true;
+		bcnt = burst;
+		ccnt = dma_length / (acnt * bcnt);
+		if (ccnt > (SZ_64K - 1)) {
+			dev_err(dev, "Exceeded max SG segment size\n");
+			return -EINVAL;
+		}
+		cidx = acnt * bcnt;
+	}
+
+	if (direction == DMA_MEM_TO_DEV) {
+		src_bidx = acnt;
+		src_cidx = cidx;
+		dst_bidx = 0;
+		dst_cidx = 0;
+	} else if (direction == DMA_DEV_TO_MEM)  {
+		src_bidx = 0;
+		src_cidx = 0;
+		dst_bidx = acnt;
+		dst_cidx = cidx;
+	} else {
+		dev_err(dev, "%s: direction not implemented yet\n", __func__);
+		return -EINVAL;
+	}
+
+	pset->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+	/* Configure A or AB synchronized transfers */
+	if (absync)
+		pset->opt |= SYNCDIM;
+
+	pset->src = src_addr;
+	pset->dst = dst_addr;
+
+	pset->src_dst_bidx = (dst_bidx << 16) | src_bidx;
+	pset->src_dst_cidx = (dst_cidx << 16) | src_cidx;
+
+	pset->a_b_cnt = bcnt << 16 | acnt;
+	pset->ccnt = ccnt;
+	/*
+	 * Only time when (bcntrld) auto reload is required is for
+	 * A-sync case, and in this case, a requirement of reload value
+	 * of SZ_64K-1 only is assured. 'link' is initially set to NULL
+	 * and then later will be populated by edma_execute.
+	 */
+	pset->link_bcntrld = 0xffffffff;
+	return absync;
+}
+
 static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl,
 	unsigned int sg_len, enum dma_transfer_direction direction,
@@ -258,23 +381,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 	struct edma_chan *echan = to_edma_chan(chan);
 	struct device *dev = chan->device->dev;
 	struct edma_desc *edesc;
-	dma_addr_t dev_addr;
+	dma_addr_t src_addr = 0, dst_addr = 0;
 	enum dma_slave_buswidth dev_width;
 	u32 burst;
 	struct scatterlist *sg;
-	int acnt, bcnt, ccnt, src, dst, cidx;
-	int src_bidx, dst_bidx, src_cidx, dst_cidx;
-	int i, nslots;
+	int i, nslots, ret;
 
 	if (unlikely(!echan || !sgl || !sg_len))
 		return NULL;
 
 	if (direction == DMA_DEV_TO_MEM) {
-		dev_addr = echan->cfg.src_addr;
+		src_addr = echan->cfg.src_addr;
 		dev_width = echan->cfg.src_addr_width;
 		burst = echan->cfg.src_maxburst;
 	} else if (direction == DMA_MEM_TO_DEV) {
-		dev_addr = echan->cfg.dst_addr;
+		dst_addr = echan->cfg.dst_addr;
 		dev_width = echan->cfg.dst_addr_width;
 		burst = echan->cfg.dst_maxburst;
 	} else {
@@ -307,7 +428,6 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "Failed to allocate slot\n");
-				kfree(edesc);
 				return NULL;
 			}
 		}
@@ -315,64 +435,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 
 	/* Configure PaRAM sets for each SG */
 	for_each_sg(sgl, sg, sg_len, i) {
-
-		acnt = dev_width;
-
-		/*
-		 * If the maxburst is equal to the fifo width, use
-		 * A-synced transfers. This allows for large contiguous
-		 * buffer transfers using only one PaRAM set.
-		 */
-		if (burst == 1) {
-			edesc->absync = false;
-			ccnt = sg_dma_len(sg) / acnt / (SZ_64K - 1);
-			bcnt = sg_dma_len(sg) / acnt - ccnt * (SZ_64K - 1);
-			if (bcnt)
-				ccnt++;
-			else
-				bcnt = SZ_64K - 1;
-			cidx = acnt;
-		/*
-		 * If maxburst is greater than the fifo address_width,
-		 * use AB-synced transfers where A count is the fifo
-		 * address_width and B count is the maxburst. In this
-		 * case, we are limited to transfers of C count frames
-		 * of (address_width * maxburst) where C count is limited
-		 * to SZ_64K-1. This places an upper bound on the length
-		 * of an SG segment that can be handled.
-		 */
-		} else {
-			edesc->absync = true;
-			bcnt = burst;
-			ccnt = sg_dma_len(sg) / (acnt * bcnt);
-			if (ccnt > (SZ_64K - 1)) {
-				dev_err(dev, "Exceeded max SG segment size\n");
-				kfree(edesc);
-				return NULL;
-			}
-			cidx = acnt * bcnt;
+		/* Get address for each SG */
+		if (direction == DMA_DEV_TO_MEM)
+			dst_addr = sg_dma_address(sg);
+		else
+			src_addr = sg_dma_address(sg);
+
+		ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+				       dst_addr, burst, dev_width,
+				       sg_dma_len(sg), direction);
+		if (ret < 0) {
+			kfree(edesc);
+			return NULL;
 		}
 
-		if (direction == DMA_MEM_TO_DEV) {
-			src = sg_dma_address(sg);
-			dst = dev_addr;
-			src_bidx = acnt;
-			src_cidx = cidx;
-			dst_bidx = 0;
-			dst_cidx = 0;
-		} else {
-			src = dev_addr;
-			dst = sg_dma_address(sg);
-			src_bidx = 0;
-			src_cidx = 0;
-			dst_bidx = acnt;
-			dst_cidx = cidx;
-		}
-
-		edesc->pset[i].opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
-		/* Configure A or AB synchronized transfers */
-		if (edesc->absync)
-			edesc->pset[i].opt |= SYNCDIM;
+		edesc->absync = ret;
 
 		/* If this is the last in a current SG set of transactions,
 		   enable interrupts so that next set is processed */
@@ -382,17 +459,138 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 		/* If this is the last set, enable completion interrupt flag */
 		if (i == sg_len - 1)
 			edesc->pset[i].opt |= TCINTEN;
+	}
 
-		edesc->pset[i].src = src;
-		edesc->pset[i].dst = dst;
+	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
 
-		edesc->pset[i].src_dst_bidx = (dst_bidx << 16) | src_bidx;
-		edesc->pset[i].src_dst_cidx = (dst_cidx << 16) | src_cidx;
+static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long tx_flags, void *context)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	struct edma_desc *edesc;
+	dma_addr_t src_addr, dst_addr;
+	enum dma_slave_buswidth dev_width;
+	u32 burst;
+	int i, ret, nslots;
+
+	if (unlikely(!echan || !buf_len || !period_len))
+		return NULL;
+
+	if (direction == DMA_DEV_TO_MEM) {
+		src_addr = echan->cfg.src_addr;
+		dst_addr = buf_addr;
+		dev_width = echan->cfg.src_addr_width;
+		burst = echan->cfg.src_maxburst;
+	} else if (direction == DMA_MEM_TO_DEV) {
+		src_addr = buf_addr;
+		dst_addr = echan->cfg.dst_addr;
+		dev_width = echan->cfg.dst_addr_width;
+		burst = echan->cfg.dst_maxburst;
+	} else {
+		dev_err(dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+		dev_err(dev, "Undefined slave buswidth\n");
+		return NULL;
+	}
+
+	if (unlikely(buf_len % period_len)) {
+		dev_err(dev, "Period should be multiple of Buffer length\n");
+		return NULL;
+	}
+
+	nslots = (buf_len / period_len) + 1;
+
+	/*
+	 * Cyclic DMA users such as audio cannot tolerate delays introduced
+	 * by cases where the number of periods is more than the maximum
+	 * number of SGs the EDMA driver can handle at a time. For DMA types
+	 * such as Slave SGs, such delays are tolerable and synchronized,
+	 * but the synchronization is difficult to achieve with Cyclic and
+	 * cannot be guaranteed, so we error out early.
+	 */
+	if (nslots > MAX_NR_SG)
+		return NULL;
+
+	edesc = kzalloc(sizeof(*edesc) + nslots *
+		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	if (!edesc) {
+		dev_dbg(dev, "Failed to allocate a descriptor\n");
+		return NULL;
+	}
+
+	edesc->cyclic = 1;
+	edesc->pset_nr = nslots;
+
+	dev_dbg(dev, "%s: nslots=%d\n", __func__, nslots);
+	dev_dbg(dev, "%s: period_len=%d\n", __func__, period_len);
+	dev_dbg(dev, "%s: buf_len=%d\n", __func__, buf_len);
+
+	for (i = 0; i < nslots; i++) {
+		/* Allocate a PaRAM slot, if needed */
+		if (echan->slot[i] < 0) {
+			echan->slot[i] =
+				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
+						EDMA_SLOT_ANY);
+			if (echan->slot[i] < 0) {
+				dev_err(dev, "Failed to allocate slot\n");
+				return NULL;
+			}
+		}
+
+		if (i == nslots - 1) {
+			memcpy(&edesc->pset[i], &edesc->pset[0],
+			       sizeof(edesc->pset[0]));
+			break;
+		}
+
+		ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+				       dst_addr, burst, dev_width, period_len,
+				       direction);
+		if (ret < 0)
+			return NULL;
 
-		edesc->pset[i].a_b_cnt = bcnt << 16 | acnt;
-		edesc->pset[i].ccnt = ccnt;
-		edesc->pset[i].link_bcntrld = 0xffffffff;
+		if (direction == DMA_DEV_TO_MEM)
+			dst_addr += period_len;
+		else
+			src_addr += period_len;
 
+		dev_dbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
+		dev_dbg(dev,
+			"\n pset[%d]:\n"
+			"  chnum\t%d\n"
+			"  slot\t%d\n"
+			"  opt\t%08x\n"
+			"  src\t%08x\n"
+			"  dst\t%08x\n"
+			"  abcnt\t%08x\n"
+			"  ccnt\t%08x\n"
+			"  bidx\t%08x\n"
+			"  cidx\t%08x\n"
+			"  lkrld\t%08x\n",
+			i, echan->ch_num, echan->slot[i],
+			edesc->pset[i].opt,
+			edesc->pset[i].src,
+			edesc->pset[i].dst,
+			edesc->pset[i].a_b_cnt,
+			edesc->pset[i].ccnt,
+			edesc->pset[i].src_dst_bidx,
+			edesc->pset[i].src_dst_cidx,
+			edesc->pset[i].link_bcntrld);
+
+		edesc->absync = ret;
+
+		/*
+		 * Enable interrupts for every period because callback
+		 * has to be called for every period.
+		 */
+		edesc->pset[i].opt |= TCINTEN;
 	}
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
@@ -406,30 +604,34 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
 	unsigned long flags;
 	struct edmacc_param p;
 
-	/* Pause the channel */
-	edma_pause(echan->ch_num);
+	edesc = echan->edesc;
+
+	/* Pause the channel for non-cyclic */
+	if (!edesc || (edesc && !edesc->cyclic))
+		edma_pause(echan->ch_num);
 
 	switch (ch_status) {
-	case DMA_COMPLETE:
+	case EDMA_DMA_COMPLETE:
 		spin_lock_irqsave(&echan->vchan.lock, flags);
 
-		edesc = echan->edesc;
 		if (edesc) {
-			if (edesc->processed == edesc->pset_nr) {
+			if (edesc->cyclic) {
+				vchan_cyclic_callback(&edesc->vdesc);
+			} else if (edesc->processed == edesc->pset_nr) {
 				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
 				edma_stop(echan->ch_num);
 				vchan_cookie_complete(&edesc->vdesc);
+				edma_execute(echan);
 			} else {
 				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
+				edma_execute(echan);
 			}
-
-			edma_execute(echan);
 		}
 
 		spin_unlock_irqrestore(&echan->vchan.lock, flags);
 
 		break;
-	case DMA_CC_ERROR:
+	case EDMA_DMA_CC_ERROR:
 		spin_lock_irqsave(&echan->vchan.lock, flags);
 
 		edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
@@ -579,7 +781,7 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
 	unsigned long flags;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS || !txstate)
+	if (ret == DMA_COMPLETE || !txstate)
 		return ret;
 
 	spin_lock_irqsave(&echan->vchan.lock, flags);
@@ -619,6 +821,7 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
 			  struct device *dev)
 {
 	dma->device_prep_slave_sg = edma_prep_slave_sg;
+	dma->device_prep_dma_cyclic = edma_prep_dma_cyclic;
 	dma->device_alloc_chan_resources = edma_alloc_chan_resources;
 	dma->device_free_chan_resources = edma_free_chan_resources;
 	dma->device_issue_pending = edma_issue_pending;
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-11-20 13:20:24 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-11-20 13:20:24 -0800
commit	e6d69a60b77a6ea8d5f9d41765c7571bb8d45531 (patch)
tree	4ea3fe7c49a864da2ce7ffb51a703661826dc15d /drivers/dma/edma.c
parent	5a1efc6e68a095917277459091fafba6a6baef17 (diff)
parent	df12a3178d340319b1955be6b973a4eb84aff754 (diff)
download	blackbird-op-linux-e6d69a60b77a6ea8d5f9d41765c7571bb8d45531.tar.gz blackbird-op-linux-e6d69a60b77a6ea8d5f9d41765c7571bb8d45531.zip