diff options
50 files changed, 5640 insertions, 4852 deletions
diff --git a/Documentation/devicetree/bindings/dma/arm-pl08x.txt b/Documentation/devicetree/bindings/dma/arm-pl08x.txt new file mode 100644 index 000000000000..8a0097a029d3 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/arm-pl08x.txt @@ -0,0 +1,54 @@ +* ARM PrimeCells PL080 and PL081 and derivatives DMA controller + +Required properties: +- compatible: "arm,pl080", "arm,primecell"; + "arm,pl081", "arm,primecell"; +- reg: Address range of the PL08x registers +- interrupt: The PL08x interrupt number +- clocks: The clock running the IP core clock +- clock-names: Must contain "apb_pclk" +- lli-bus-interface-ahb1: if AHB master 1 is eligible for fetching LLIs +- lli-bus-interface-ahb2: if AHB master 2 is eligible for fetching LLIs +- mem-bus-interface-ahb1: if AHB master 1 is eligible for fetching memory contents +- mem-bus-interface-ahb2: if AHB master 2 is eligible for fetching memory contents +- #dma-cells: must be <2>. First cell should contain the DMA request, + second cell should contain either 1 or 2 depending on + which AHB master that is used. + +Optional properties: +- dma-channels: contains the total number of DMA channels supported by the DMAC +- dma-requests: contains the total number of DMA requests supported by the DMAC +- memcpy-burst-size: the size of the bursts for memcpy: 1, 4, 8, 16, 32 + 64, 128 or 256 bytes are legal values +- memcpy-bus-width: the bus width used for memcpy: 8, 16 or 32 are legal + values + +Clients +Required properties: +- dmas: List of DMA controller phandle, request channel and AHB master id +- dma-names: Names of the aforementioned requested channels + +Example: + +dmac0: dma-controller@10130000 { + compatible = "arm,pl080", "arm,primecell"; + reg = <0x10130000 0x1000>; + interrupt-parent = <&vica>; + interrupts = <15>; + clocks = <&hclkdma0>; + clock-names = "apb_pclk"; + lli-bus-interface-ahb1; + lli-bus-interface-ahb2; + mem-bus-interface-ahb2; + memcpy-burst-size = <256>; + memcpy-bus-width = <32>; + #dma-cells = <2>; +}; + +device@40008000 { + ... + dmas = <&dmac0 0 2 + &dmac0 1 2>; + dma-names = "tx", "rx"; + ... +}; diff --git a/Documentation/devicetree/bindings/dma/lpc1850-dmamux.txt b/Documentation/devicetree/bindings/dma/lpc1850-dmamux.txt new file mode 100644 index 000000000000..87740adb2995 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/lpc1850-dmamux.txt @@ -0,0 +1,54 @@ +NXP LPC18xx/43xx DMA MUX (DMA request router) + +Required properties: +- compatible: "nxp,lpc1850-dmamux" +- reg: Memory map for accessing module +- #dma-cells: Should be set to <3>. + * 1st cell contain the master dma request signal + * 2nd cell contain the mux value (0-3) for the peripheral + * 3rd cell contain either 1 or 2 depending on the AHB + master used. +- dma-requests: Number of DMA requests for the mux +- dma-masters: phandle pointing to the DMA controller + +The DMA controller node need to have the following poroperties: +- dma-requests: Number of DMA requests the controller can handle + +Example: + +dmac: dma@40002000 { + compatible = "nxp,lpc1850-gpdma", "arm,pl080", "arm,primecell"; + arm,primecell-periphid = <0x00041080>; + reg = <0x40002000 0x1000>; + interrupts = <2>; + clocks = <&ccu1 CLK_CPU_DMA>; + clock-names = "apb_pclk"; + #dma-cells = <2>; + dma-channels = <8>; + dma-requests = <16>; + lli-bus-interface-ahb1; + lli-bus-interface-ahb2; + mem-bus-interface-ahb1; + mem-bus-interface-ahb2; + memcpy-burst-size = <256>; + memcpy-bus-width = <32>; +}; + +dmamux: dma-mux { + compatible = "nxp,lpc1850-dmamux"; + #dma-cells = <3>; + dma-requests = <64>; + dma-masters = <&dmac>; +}; + +uart0: serial@40081000 { + compatible = "nxp,lpc1850-uart", "ns16550a"; + reg = <0x40081000 0x1000>; + reg-shift = <2>; + interrupts = <24>; + clocks = <&ccu2 CLK_APB0_UART0>, <&ccu1 CLK_CPU_UART0>; + clock-names = "uartclk", "reg"; + dmas = <&dmamux 1 1 2 + &dmamux 2 1 2>; + dma-names = "tx", "rx"; +}; diff --git a/Documentation/devicetree/bindings/dma/mv-xor.txt b/Documentation/devicetree/bindings/dma/mv-xor.txt index cc29c35266e2..276ef815ef32 100644 --- a/Documentation/devicetree/bindings/dma/mv-xor.txt +++ b/Documentation/devicetree/bindings/dma/mv-xor.txt @@ -12,10 +12,13 @@ XOR engine has. Those sub-nodes have the following required properties: - interrupts: interrupt of the XOR channel -And the following optional properties: +The sub-nodes used to contain one or several of the following +properties, but they are now deprecated: - dmacap,memcpy to indicate that the XOR channel is capable of memcpy operations - dmacap,memset to indicate that the XOR channel is capable of memset operations - dmacap,xor to indicate that the XOR channel is capable of xor operations +- dmacap,interrupt to indicate that the XOR channel is capable of + generating interrupts Example: @@ -28,13 +31,8 @@ xor@d0060900 { xor00 { interrupts = <51>; - dmacap,memcpy; - dmacap,xor; }; xor01 { interrupts = <52>; - dmacap,memcpy; - dmacap,xor; - dmacap,memset; }; }; diff --git a/Documentation/devicetree/bindings/dma/sun4i-dma.txt b/Documentation/devicetree/bindings/dma/sun4i-dma.txt new file mode 100644 index 000000000000..f1634a27a830 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/sun4i-dma.txt @@ -0,0 +1,46 @@ +Allwinner A10 DMA Controller + +This driver follows the generic DMA bindings defined in dma.txt. + +Required properties: + +- compatible: Must be "allwinner,sun4i-a10-dma" +- reg: Should contain the registers base address and length +- interrupts: Should contain a reference to the interrupt used by this device +- clocks: Should contain a reference to the parent AHB clock +- #dma-cells : Should be 2, first cell denoting normal or dedicated dma, + second cell holding the request line number. + +Example: + dma: dma-controller@01c02000 { + compatible = "allwinner,sun4i-a10-dma"; + reg = <0x01c02000 0x1000>; + interrupts = <27>; + clocks = <&ahb_gates 6>; + #dma-cells = <2>; + }; + +Clients: + +DMA clients connected to the Allwinner A10 DMA controller must use the +format described in the dma.txt file, using a three-cell specifier for +each channel: a phandle plus two integer cells. +The three cells in order are: + +1. A phandle pointing to the DMA controller. +2. Whether it is using normal (0) or dedicated (1) channels +3. The port ID as specified in the datasheet + +Example: + spi2: spi@01c17000 { + compatible = "allwinner,sun4i-a10-spi"; + reg = <0x01c17000 0x1000>; + interrupts = <0 12 4>; + clocks = <&ahb_gates 22>, <&spi2_clk>; + clock-names = "ahb", "mod"; + dmas = <&dma 1 29>, <&dma 1 28>; + dma-names = "rx", "tx"; + status = "disabled"; + #address-cells = <1>; + #size-cells = <0>; + }; diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt index ca67b0f04c6e..67d4ce4df109 100644 --- a/Documentation/dmaengine/provider.txt +++ b/Documentation/dmaengine/provider.txt @@ -345,12 +345,29 @@ where to put them) that abstracts it away. * DMA_CTRL_ACK - - If set, the transfer can be reused after being completed. - - There is a guarantee the transfer won't be freed until it is acked - by async_tx_ack(). + - If clear, the descriptor cannot be reused by provider until the + client acknowledges receipt, i.e. has has a chance to establish any + dependency chains + - This can be acked by invoking async_tx_ack() + - If set, does not mean descriptor can be reused + + * DMA_CTRL_REUSE + - If set, the descriptor can be reused after being completed. It should + not be freed by provider if this flag is set. + - The descriptor should be prepared for reuse by invoking + dmaengine_desc_set_reuse() which will set DMA_CTRL_REUSE. + - dmaengine_desc_set_reuse() will succeed only when channel support + reusable descriptor as exhibited by capablities - As a consequence, if a device driver wants to skip the dma_map_sg() and dma_unmap_sg() in between 2 transfers, because the DMA'd data wasn't used, it can resubmit the transfer right after its completion. + - Descriptor can be freed in few ways + - Clearing DMA_CTRL_REUSE by invoking dmaengine_desc_clear_reuse() + and submitting for last txn + - Explicitly invoking dmaengine_desc_free(), this can succeed only + when DMA_CTRL_REUSE is already set + - Terminating the channel + General Design Notes -------------------- diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 88d474b78076..0b114c8996af 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -63,9 +63,18 @@ config AMBA_PL08X Platform has a PL08x DMAC device which can provide DMA engine support +config LPC18XX_DMAMUX + bool "NXP LPC18xx/43xx DMA MUX for PL080" + depends on ARCH_LPC18XX || COMPILE_TEST + depends on OF && AMBA_PL08X + select MFD_SYSCON + help + Enable support for DMA on NXP LPC18xx/43xx platforms + with PL080 and multiplexed DMA request lines. + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" - depends on PCI && X86 + depends on PCI && X86_64 select DMA_ENGINE select DMA_ENGINE_RAID select DCA @@ -425,6 +434,17 @@ config XILINX_VDMA channels, Memory Mapped to Stream (MM2S) and Stream to Memory Mapped (S2MM) for the data transfers. +config DMA_SUN4I + tristate "Allwinner A10 DMA SoCs support" + depends on MACH_SUN4I || MACH_SUN5I || MACH_SUN7I || COMPILE_TEST + default (MACH_SUN4I || MACH_SUN5I || MACH_SUN7I) + select DMA_ENGINE + select DMA_OF + select DMA_VIRTUAL_CHANNELS + help + Enable support for the DMA controller present in the sun4i, + sun5i and sun7i Allwinner ARM SoCs. + config DMA_SUN6I tristate "Allwinner A31 SoCs DMA support" depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 6a4d6f2827da..e707ff956164 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_TI_EDMA) += edma.o obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o +obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o obj-$(CONFIG_PL330_DMA) += pl330.o obj-$(CONFIG_PCH_DMA) += pch_dma.o obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o @@ -54,5 +55,6 @@ obj-y += xilinx/ obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o +obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o obj-$(CONFIG_XGENE_DMA) += xgene-dma.o diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 5de3cf453f35..9b42c0588550 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -83,6 +83,8 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/of.h> +#include <linux/of_dma.h> #include <linux/pm_runtime.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -2030,10 +2032,188 @@ static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) } #endif +#ifdef CONFIG_OF +static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x, + u32 id) +{ + struct pl08x_dma_chan *chan; + + list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) { + if (chan->signal == id) + return &chan->vc.chan; + } + + return NULL; +} + +static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct pl08x_driver_data *pl08x = ofdma->of_dma_data; + struct pl08x_channel_data *data; + struct pl08x_dma_chan *chan; + struct dma_chan *dma_chan; + + if (!pl08x) + return NULL; + + if (dma_spec->args_count != 2) + return NULL; + + dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]); + if (dma_chan) + return dma_get_slave_channel(dma_chan); + + chan = devm_kzalloc(pl08x->slave.dev, sizeof(*chan) + sizeof(*data), + GFP_KERNEL); + if (!chan) + return NULL; + + data = (void *)&chan[1]; + data->bus_id = "(none)"; + data->periph_buses = dma_spec->args[1]; + + chan->cd = data; + chan->host = pl08x; + chan->slave = true; + chan->name = data->bus_id; + chan->state = PL08X_CHAN_IDLE; + chan->signal = dma_spec->args[0]; + chan->vc.desc_free = pl08x_desc_free; + + vchan_init(&chan->vc, &pl08x->slave); + + return dma_get_slave_channel(&chan->vc.chan); +} + +static int pl08x_of_probe(struct amba_device *adev, + struct pl08x_driver_data *pl08x, + struct device_node *np) +{ + struct pl08x_platform_data *pd; + u32 cctl_memcpy = 0; + u32 val; + int ret; + + pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + /* Eligible bus masters for fetching LLIs */ + if (of_property_read_bool(np, "lli-bus-interface-ahb1")) + pd->lli_buses |= PL08X_AHB1; + if (of_property_read_bool(np, "lli-bus-interface-ahb2")) + pd->lli_buses |= PL08X_AHB2; + if (!pd->lli_buses) { + dev_info(&adev->dev, "no bus masters for LLIs stated, assume all\n"); + pd->lli_buses |= PL08X_AHB1 | PL08X_AHB2; + } + + /* Eligible bus masters for memory access */ + if (of_property_read_bool(np, "mem-bus-interface-ahb1")) + pd->mem_buses |= PL08X_AHB1; + if (of_property_read_bool(np, "mem-bus-interface-ahb2")) + pd->mem_buses |= PL08X_AHB2; + if (!pd->mem_buses) { + dev_info(&adev->dev, "no bus masters for memory stated, assume all\n"); + pd->mem_buses |= PL08X_AHB1 | PL08X_AHB2; + } + + /* Parse the memcpy channel properties */ + ret = of_property_read_u32(np, "memcpy-burst-size", &val); + if (ret) { + dev_info(&adev->dev, "no memcpy burst size specified, using 1 byte\n"); + val = 1; + } + switch (val) { + default: + dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n"); + /* Fall through */ + case 1: + cctl_memcpy |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 4: + cctl_memcpy |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 8: + cctl_memcpy |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 16: + cctl_memcpy |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 32: + cctl_memcpy |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 64: + cctl_memcpy |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 128: + cctl_memcpy |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case 256: + cctl_memcpy |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + } + + ret = of_property_read_u32(np, "memcpy-bus-width", &val); + if (ret) { + dev_info(&adev->dev, "no memcpy bus width specified, using 8 bits\n"); + val = 8; + } + switch (val) { + default: + dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n"); + /* Fall through */ + case 8: + cctl_memcpy |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 16: + cctl_memcpy |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case 32: + cctl_memcpy |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + } + + /* This is currently the only thing making sense */ + cctl_memcpy |= PL080_CONTROL_PROT_SYS; + + /* Set up memcpy channel */ + pd->memcpy_channel.bus_id = "memcpy"; + pd->memcpy_channel.cctl_memcpy = cctl_memcpy; + /* Use the buses that can access memory, obviously */ + pd->memcpy_channel.periph_buses = pd->mem_buses; + + pl08x->pd = pd; + + return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate, + pl08x); +} +#else +static inline int pl08x_of_probe(struct amba_device *adev, + struct pl08x_driver_data *pl08x, + struct device_node *np) +{ + return -EINVAL; +} +#endif + static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) { struct pl08x_driver_data *pl08x; const struct vendor_data *vd = id->data; + struct device_node *np = adev->dev.of_node; u32 tsfr_size; int ret = 0; int i; @@ -2093,9 +2273,15 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) /* Get the platform data */ pl08x->pd = dev_get_platdata(&adev->dev); if (!pl08x->pd) { - dev_err(&adev->dev, "no platform data supplied\n"); - ret = -EINVAL; - goto out_no_platdata; + if (np) { + ret = pl08x_of_probe(adev, pl08x, np); + if (ret) + goto out_no_platdata; + } else { + dev_err(&adev->dev, "no platform data supplied\n"); + ret = -EINVAL; + goto out_no_platdata; + } } /* Assign useful pointers to the driver state */ diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 59892126d175..d313acbb50e0 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -659,14 +659,14 @@ atc_prep_dma_interleaved(struct dma_chan *chan, size_t len = 0; int i; + if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) + return NULL; + dev_info(chan2dev(chan), "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n", __func__, xt->src_start, xt->dst_start, xt->numf, xt->frame_size, flags); - if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) - return NULL; - /* * The controller can only "skip" X bytes every Y bytes, so we * need to make sure we are given a template that fit that diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index cf1213de7865..fbf573bcf98e 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -624,12 +624,12 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, unsigned long flags, void *context) { - struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); - struct at_xdmac_desc *first = NULL, *prev = NULL; - struct scatterlist *sg; - int i; - unsigned int xfer_size = 0; - unsigned long irqflags; + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *first = NULL, *prev = NULL; + struct scatterlist *sg; + int i; + unsigned int xfer_size = 0; + unsigned long irqflags; struct dma_async_tx_descriptor *ret = NULL; if (!sgl) @@ -795,10 +795,7 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, list_add_tail(&desc->desc_node, &first->descs_list); } - prev->lld.mbr_nda = first->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", - __func__, prev, &prev->lld.mbr_nda); + at_xdmac_queue_desc(chan, prev, first); first->tx_dma_desc.flags = flags; first->xfer_size = buf_len; first->direction = direction; @@ -1133,7 +1130,7 @@ static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan, * SAMA5D4x), so we can use the same interface for source and dest, * that solves the fact we don't know the direction. */ - u32 chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM + u32 chan_cc = AT_XDMAC_CC_DAM_UBS_AM | AT_XDMAC_CC_SAM_INCREMENTED_AM | AT_XDMAC_CC_DIF(0) | AT_XDMAC_CC_SIF(0) @@ -1201,6 +1198,168 @@ at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, return &desc->tx_dma_desc; } +static struct dma_async_tx_descriptor * +at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, int value, + unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *desc, *pdesc = NULL, + *ppdesc = NULL, *first = NULL; + struct scatterlist *sg, *psg = NULL, *ppsg = NULL; + size_t stride = 0, pstride = 0, len = 0; + int i; + + if (!sgl) + return NULL; + + dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n", + __func__, sg_len, value, flags); + + /* Prepare descriptors. */ + for_each_sg(sgl, sg, sg_len, i) { + dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n", + __func__, sg_dma_address(sg), sg_dma_len(sg), + value, flags); + desc = at_xdmac_memset_create_desc(chan, atchan, + sg_dma_address(sg), + sg_dma_len(sg), + value); + if (!desc && first) + list_splice_init(&first->descs_list, + &atchan->free_descs_list); + + if (!first) + first = desc; + + /* Update our strides */ + pstride = stride; + if (psg) + stride = sg_dma_address(sg) - + (sg_dma_address(psg) + sg_dma_len(psg)); + + /* + * The scatterlist API gives us only the address and + * length of each elements. + * + * Unfortunately, we don't have the stride, which we + * will need to compute. + * + * That make us end up in a situation like this one: + * len stride len stride len + * +-------+ +-------+ +-------+ + * | N-2 | | N-1 | | N | + * +-------+ +-------+ +-------+ + * + * We need all these three elements (N-2, N-1 and N) + * to actually take the decision on whether we need to + * queue N-1 or reuse N-2. + * + * We will only consider N if it is the last element. + */ + if (ppdesc && pdesc) { + if ((stride == pstride) && + (sg_dma_len(ppsg) == sg_dma_len(psg))) { + dev_dbg(chan2dev(chan), + "%s: desc 0x%p can be merged with desc 0x%p\n", + __func__, pdesc, ppdesc); + + /* + * Increment the block count of the + * N-2 descriptor + */ + at_xdmac_increment_block_count(chan, ppdesc); + ppdesc->lld.mbr_dus = stride; + + /* + * Put back the N-1 descriptor in the + * free descriptor list + */ + list_add_tail(&pdesc->desc_node, + &atchan->free_descs_list); + + /* + * Make our N-1 descriptor pointer + * point to the N-2 since they were + * actually merged. + */ + pdesc = ppdesc; + + /* + * Rule out the case where we don't have + * pstride computed yet (our second sg + * element) + * + * We also want to catch the case where there + * would be a negative stride, + */ + } else if (pstride || + sg_dma_address(sg) < sg_dma_address(psg)) { + /* + * Queue the N-1 descriptor after the + * N-2 + */ + at_xdmac_queue_desc(chan, ppdesc, pdesc); + + /* + * Add the N-1 descriptor to the list + * of the descriptors used for this + * transfer + */ + list_add_tail(&desc->desc_node, + &first->descs_list); + dev_dbg(chan2dev(chan), + "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + } + } + + /* + * If we are the last element, just see if we have the + * same size than the previous element. + * + * If so, we can merge it with the previous descriptor + * since we don't care about the stride anymore. + */ + if ((i == (sg_len - 1)) && + sg_dma_len(ppsg) == sg_dma_len(psg)) { + dev_dbg(chan2dev(chan), + "%s: desc 0x%p can be merged with desc 0x%p\n", + __func__, desc, pdesc); + + /* + * Increment the block count of the N-1 + * descriptor + */ + at_xdmac_increment_block_count(chan, pdesc); + pdesc->lld.mbr_dus = stride; + + /* + * Put back the N descriptor in the free + * descriptor list + */ + list_add_tail(&desc->desc_node, + &atchan->free_descs_list); + } + + /* Update our descriptors */ + ppdesc = pdesc; + pdesc = desc; + + /* Update our scatter pointers */ + ppsg = psg; + psg = sg; + + len += sg_dma_len(sg); + } + + first->tx_dma_desc.cookie = -EBUSY; + first->tx_dma_desc.flags = flags; + first->xfer_size = len; + + return &first->tx_dma_desc; +} + static enum dma_status at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, struct dma_tx_state *txstate) @@ -1734,6 +1893,7 @@ static int at_xdmac_probe(struct platform_device *pdev) dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask); dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask); dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask); + dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask); dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask); /* * Without DMA_PRIVATE the driver is not able to allocate more than @@ -1749,6 +1909,7 @@ static int at_xdmac_probe(struct platform_device *pdev) atxdmac->dma.device_prep_interleaved_dma = at_xdmac_prep_interleaved; atxdmac->dma.device_prep_dma_memcpy = at_xdmac_prep_dma_memcpy; atxdmac->dma.device_prep_dma_memset = at_xdmac_prep_dma_memset; + atxdmac->dma.device_prep_dma_memset_sg = at_xdmac_prep_dma_memset_sg; atxdmac->dma.device_prep_slave_sg = at_xdmac_prep_slave_sg; atxdmac->dma.device_config = at_xdmac_device_config; atxdmac->dma.device_pause = at_xdmac_device_pause; diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index fd22dd36985f..c340ca9bd2b5 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -2730,7 +2730,7 @@ static int __init coh901318_probe(struct platform_device *pdev) * This controller can only access address at even 32bit boundaries, * i.e. 2^2 */ - base->dma_memcpy.copy_align = 2; + base->dma_memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES; err = dma_async_device_register(&base->dma_memcpy); if (err) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 26d2f0e09ea3..dade7c47ff18 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -145,7 +145,8 @@ struct jz4780_dma_dev { struct jz4780_dma_chan chan[JZ_DMA_NR_CHANNELS]; }; -struct jz4780_dma_data { +struct jz4780_dma_filter_data { + struct device_node *of_node; uint32_t transfer_type; int channel; }; @@ -214,11 +215,25 @@ static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc) kfree(desc); } -static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord) +static uint32_t jz4780_dma_transfer_size(unsigned long val, uint32_t *shift) { - *ord = ffs(val) - 1; + int ord = ffs(val) - 1; - switch (*ord) { + /* + * 8 byte transfer sizes unsupported so fall back on 4. If it's larger + * than the maximum, just limit it. It is perfectly safe to fall back + * in this way since we won't exceed the maximum burst size supported + * by the device, the only effect is reduced efficiency. This is better + * than refusing to perform the request at all. + */ + if (ord == 3) + ord = 2; + else if (ord > 7) + ord = 7; + + *shift = ord; + + switch (ord) { case 0: return JZ_DMA_SIZE_1_BYTE; case 1: @@ -231,20 +246,17 @@ static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord) return JZ_DMA_SIZE_32_BYTE; case 6: return JZ_DMA_SIZE_64_BYTE; - case 7: - return JZ_DMA_SIZE_128_BYTE; default: - return -EINVAL; + return JZ_DMA_SIZE_128_BYTE; } } -static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, +static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, struct jz4780_dma_hwdesc *desc, dma_addr_t addr, size_t len, enum dma_transfer_direction direction) { struct dma_slave_config *config = &jzchan->config; uint32_t width, maxburst, tsz; - int ord; if (direction == DMA_MEM_TO_DEV) { desc->dcm = JZ_DMA_DCM_SAI; @@ -271,8 +283,8 @@ static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, * divisible by the transfer size, and we must not use more than the * maximum burst specified by the user. */ - tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst), &ord); - jzchan->transfer_shift = ord; + tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst), + &jzchan->transfer_shift); switch (width) { case DMA_SLAVE_BUSWIDTH_1_BYTE: @@ -289,12 +301,14 @@ static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan, desc->dcm |= width << JZ_DMA_DCM_SP_SHIFT; desc->dcm |= width << JZ_DMA_DCM_DP_SHIFT; - desc->dtc = len >> ord; + desc->dtc = len >> jzchan->transfer_shift; + return 0; } static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, - enum dma_transfer_direction direction, unsigned long flags) + enum dma_transfer_direction direction, unsigned long flags, + void *context) { struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_desc *desc; @@ -307,12 +321,11 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg( for (i = 0; i < sg_len; i++) { err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], - sg_dma_address(&sgl[i]), - sg_dma_len(&sgl[i]), - direction); + sg_dma_address(&sgl[i]), + sg_dma_len(&sgl[i]), + direction); if (err < 0) - return ERR_PTR(err); - + return NULL; desc->desc[i].dcm |= JZ_DMA_DCM_TIE; @@ -354,9 +367,9 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic( for (i = 0; i < periods; i++) { err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], buf_addr, - period_len, direction); + period_len, direction); if (err < 0) - return ERR_PTR(err); + return NULL; buf_addr += period_len; @@ -390,15 +403,13 @@ struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_desc *desc; uint32_t tsz; - int ord; desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY); if (!desc) return NULL; - tsz = jz4780_dma_transfer_size(dest | src | len, &ord); - if (tsz < 0) - return ERR_PTR(tsz); + tsz = jz4780_dma_transfer_size(dest | src | len, + &jzchan->transfer_shift); desc->desc[0].dsa = src; desc->desc[0].dta = dest; @@ -407,7 +418,7 @@ struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy( tsz << JZ_DMA_DCM_TSZ_SHIFT | JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_SP_SHIFT | JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_DP_SHIFT; - desc->desc[0].dtc = len >> ord; + desc->desc[0].dtc = len >> jzchan->transfer_shift; return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags); } @@ -484,8 +495,9 @@ static void jz4780_dma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&jzchan->vchan.lock, flags); } -static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan) +static int jz4780_dma_terminate_all(struct dma_chan *chan) { + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); unsigned long flags; LIST_HEAD(head); @@ -507,9 +519,11 @@ static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan) return 0; } -static int jz4780_dma_slave_config(struct jz4780_dma_chan *jzchan, - const struct dma_slave_config *config) +static int jz4780_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) { + struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); + if ((config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES) || (config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)) return -EINVAL; @@ -567,8 +581,8 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, txstate->residue = 0; if (vdesc && jzchan->desc && vdesc == &jzchan->desc->vdesc - && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) - status = DMA_ERROR; + && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) + status = DMA_ERROR; spin_unlock_irqrestore(&jzchan->vchan.lock, flags); return status; @@ -671,7 +685,10 @@ static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param) { struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan); struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan); - struct jz4780_dma_data *data = param; + struct jz4780_dma_filter_data *data = param; + + if (jzdma->dma_device.dev->of_node != data->of_node) + return false; if (data->channel > -1) { if (data->channel != jzchan->id) @@ -690,11 +707,12 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, { struct jz4780_dma_dev *jzdma = ofdma->of_dma_data; dma_cap_mask_t mask = jzdma->dma_device.cap_mask; - struct jz4780_dma_data data; + struct jz4780_dma_filter_data data; if (dma_spec->args_count != 2) return NULL; + data.of_node = ofdma->of_node; data.transfer_type = dma_spec->args[0]; data.channel = dma_spec->args[1]; @@ -713,9 +731,14 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec, data.channel); return NULL; } - } - return dma_request_channel(mask, jz4780_dma_filter_fn, &data); + jzdma->chan[data.channel].transfer_type = data.transfer_type; + + return dma_get_slave_channel( + &jzdma->chan[data.channel].vchan.chan); + } else { + return dma_request_channel(mask, jz4780_dma_filter_fn, &data); + } } static int jz4780_dma_probe(struct platform_device *pdev) @@ -743,23 +766,26 @@ static int jz4780_dma_probe(struct platform_device *pdev) if (IS_ERR(jzdma->base)) return PTR_ERR(jzdma->base); - jzdma->irq = platform_get_irq(pdev, 0); - if (jzdma->irq < 0) { + ret = platform_get_irq(pdev, 0); + if (ret < 0) { dev_err(dev, "failed to get IRQ: %d\n", ret); - return jzdma->irq; + return ret; } - ret = devm_request_irq(dev, jzdma->irq, jz4780_dma_irq_handler, 0, - dev_name(dev), jzdma); + jzdma->irq = ret; + + ret = request_irq(jzdma->irq, jz4780_dma_irq_handler, 0, dev_name(dev), + jzdma); if (ret) { dev_err(dev, "failed to request IRQ %u!\n", jzdma->irq); - return -EINVAL; + return ret; } jzdma->clk = devm_clk_get(dev, NULL); if (IS_ERR(jzdma->clk)) { dev_err(dev, "failed to get clock\n"); - return PTR_ERR(jzdma->clk); + ret = PTR_ERR(jzdma->clk); + goto err_free_irq; } clk_prepare_enable(jzdma->clk); @@ -775,13 +801,13 @@ static int jz4780_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_CYCLIC, dd->cap_mask); dd->dev = dev; - dd->copy_align = 2; /* 2^2 = 4 byte alignment */ + dd->copy_align = DMAENGINE_ALIGN_4_BYTES; dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources; dd->device_free_chan_resources = jz4780_dma_free_chan_resources; dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg; dd->device_prep_dma_cyclic = jz4780_dma_prep_dma_cyclic; dd->device_prep_dma_memcpy = jz4780_dma_prep_dma_memcpy; - dd->device_config = jz4780_dma_slave_config; + dd->device_config = jz4780_dma_config; dd->device_terminate_all = jz4780_dma_terminate_all; dd->device_tx_status = jz4780_dma_tx_status; dd->device_issue_pending = jz4780_dma_issue_pending; @@ -790,7 +816,6 @@ static int jz4780_dma_probe(struct platform_device *pdev) dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; - /* * Enable DMA controller, mark all channels as not programmable. * Also set the FMSC bit - it increases MSC performance, so it makes @@ -832,15 +857,24 @@ err_unregister_dev: err_disable_clk: clk_disable_unprepare(jzdma->clk); + +err_free_irq: + free_irq(jzdma->irq, jzdma); return ret; } static int jz4780_dma_remove(struct platform_device *pdev) { struct jz4780_dma_dev *jzdma = platform_get_drvdata(pdev); + int i; of_dma_controller_free(pdev->dev.of_node); - devm_free_irq(&pdev->dev, jzdma->irq, jzdma); + + free_irq(jzdma->irq, jzdma); + + for (i = 0; i < JZ_DMA_NR_CHANNELS; i++) + tasklet_kill(&jzdma->chan[i].vchan.task); + dma_async_device_unregister(&jzdma->dma_device); return 0; } diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 88853af69489..3e5d4f193005 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1000,7 +1000,7 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma, * code using dma memcpy must make sure alignment of * length is at dma->copy_align boundary. */ - dma->copy_align = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma->copy_align = DMAENGINE_ALIGN_4_BYTES; INIT_LIST_HEAD(&dma->channels); } diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index f42f71e37e73..7669c7dd1e34 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -99,21 +99,13 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) static void hsu_dma_stop_channel(struct hsu_dma_chan *hsuc) { - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); hsu_chan_disable(hsuc); hsu_chan_writel(hsuc, HSU_CH_DCR, 0); - spin_unlock_irqrestore(&hsuc->lock, flags); } static void hsu_dma_start_channel(struct hsu_dma_chan *hsuc) { - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); hsu_dma_chan_start(hsuc); - spin_unlock_irqrestore(&hsuc->lock, flags); } static void hsu_dma_start_transfer(struct hsu_dma_chan *hsuc) @@ -139,9 +131,9 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc) unsigned long flags; u32 sr; - spin_lock_irqsave(&hsuc->lock, flags); + spin_lock_irqsave(&hsuc->vchan.lock, flags); sr = hsu_chan_readl(hsuc, HSU_CH_SR); - spin_unlock_irqrestore(&hsuc->lock, flags); + spin_unlock_irqrestore(&hsuc->vchan.lock, flags); return sr; } @@ -273,14 +265,11 @@ static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc) struct hsu_dma_desc *desc = hsuc->desc; size_t bytes = hsu_dma_desc_size(desc); int i; - unsigned long flags; - spin_lock_irqsave(&hsuc->lock, flags); i = desc->active % HSU_DMA_CHAN_NR_DESC; do { bytes += hsu_chan_readl(hsuc, HSU_CH_DxTSR(i)); } while (--i >= 0); - spin_unlock_irqrestore(&hsuc->lock, flags); return bytes; } @@ -327,24 +316,6 @@ static int hsu_dma_slave_config(struct dma_chan *chan, return 0; } -static void hsu_dma_chan_deactivate(struct hsu_dma_chan *hsuc) -{ - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); - hsu_chan_disable(hsuc); - spin_unlock_irqrestore(&hsuc->lock, flags); -} - -static void hsu_dma_chan_activate(struct hsu_dma_chan *hsuc) -{ - unsigned long flags; - - spin_lock_irqsave(&hsuc->lock, flags); - hsu_chan_enable(hsuc); - spin_unlock_irqrestore(&hsuc->lock, flags); -} - static int hsu_dma_pause(struct dma_chan *chan) { struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan); @@ -352,7 +323,7 @@ static int hsu_dma_pause(struct dma_chan *chan) spin_lock_irqsave(&hsuc->vchan.lock, flags); if (hsuc->desc && hsuc->desc->status == DMA_IN_PROGRESS) { - hsu_dma_chan_deactivate(hsuc); + hsu_chan_disable(hsuc); hsuc->desc->status = DMA_PAUSED; } spin_unlock_irqrestore(&hsuc->vchan.lock, flags); @@ -368,7 +339,7 @@ static int hsu_dma_resume(struct dma_chan *chan) spin_lock_irqsave(&hsuc->vchan.lock, flags); if (hsuc->desc && hsuc->desc->status == DMA_PAUSED) { hsuc->desc->status = DMA_IN_PROGRESS; - hsu_dma_chan_activate(hsuc); + hsu_chan_enable(hsuc); } spin_unlock_irqrestore(&hsuc->vchan.lock, flags); @@ -441,8 +412,6 @@ int hsu_dma_probe(struct hsu_dma_chip *chip) hsuc->direction = (i & 0x1) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV; hsuc->reg = addr + i * HSU_DMA_CHAN_LENGTH; - - spin_lock_init(&hsuc->lock); } dma_cap_set(DMA_SLAVE, hsu->dma.cap_mask); diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index 0275233cf550..eeb9fff66967 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -78,7 +78,6 @@ struct hsu_dma_chan { struct virt_dma_chan vchan; void __iomem *reg; - spinlock_t lock; /* hardware configuration */ enum dma_transfer_direction direction; diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 865501fcc67d..48d85f8b95fe 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -1083,8 +1083,12 @@ static int __init imxdma_probe(struct platform_device *pdev) if (IS_ERR(imxdma->dma_ahb)) return PTR_ERR(imxdma->dma_ahb); - clk_prepare_enable(imxdma->dma_ipg); - clk_prepare_enable(imxdma->dma_ahb); + ret = clk_prepare_enable(imxdma->dma_ipg); + if (ret) + return ret; + ret = clk_prepare_enable(imxdma->dma_ahb); + if (ret) + goto disable_dma_ipg_clk; /* reset DMA module */ imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR); @@ -1094,20 +1098,20 @@ static int __init imxdma_probe(struct platform_device *pdev) dma_irq_handler, 0, "DMA", imxdma); if (ret) { dev_warn(imxdma->dev, "Can't register IRQ for DMA\n"); - goto err; + goto disable_dma_ahb_clk; } irq_err = platform_get_irq(pdev, 1); if (irq_err < 0) { ret = irq_err; - goto err; + goto disable_dma_ahb_clk; } ret = devm_request_irq(&pdev->dev, irq_err, imxdma_err_handler, 0, "DMA", imxdma); if (ret) { dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n"); - goto err; + goto disable_dma_ahb_clk; } } @@ -1144,7 +1148,7 @@ static int __init imxdma_probe(struct platform_device *pdev) dev_warn(imxdma->dev, "Can't register IRQ %d " "for DMA channel %d\n", irq + i, i); - goto err; + goto disable_dma_ahb_clk; } init_timer(&imxdmac->watchdog); imxdmac->watchdog.function = &imxdma_watchdog; @@ -1183,14 +1187,14 @@ static int __init imxdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imxdma); - imxdma->dma_device.copy_align = 2; /* 2^2 = 4 bytes alignment */ + imxdma->dma_device.copy_align = DMAENGINE_ALIGN_4_BYTES; imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms; dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff); ret = dma_async_device_register(&imxdma->dma_device); if (ret) { dev_err(&pdev->dev, "unable to register\n"); - goto err; + goto disable_dma_ahb_clk; } if (pdev->dev.of_node) { @@ -1206,9 +1210,10 @@ static int __init imxdma_probe(struct platform_device *pdev) err_of_dma_controller: dma_async_device_unregister(&imxdma->dma_device); -err: - clk_disable_unprepare(imxdma->dma_ipg); +disable_dma_ahb_clk: clk_disable_unprepare(imxdma->dma_ahb); +disable_dma_ipg_clk: + clk_disable_unprepare(imxdma->dma_ipg); return ret; } diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 77b6aab04f47..9d375bc7590a 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -35,12 +35,16 @@ #include <linux/platform_device.h> #include <linux/dmaengine.h> #include <linux/of.h> +#include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_dma.h> #include <asm/irq.h> #include <linux/platform_data/dma-imx-sdma.h> #include <linux/platform_data/dma-imx.h> +#include <linux/regmap.h> +#include <linux/mfd/syscon.h> +#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h> #include "dmaengine.h" @@ -124,6 +128,56 @@ #define CHANGE_ENDIANNESS 0x80 /* + * p_2_p watermark_level description + * Bits Name Description + * 0-7 Lower WML Lower watermark level + * 8 PS 1: Pad Swallowing + * 0: No Pad Swallowing + * 9 PA 1: Pad Adding + * 0: No Pad Adding + * 10 SPDIF If this bit is set both source + * and destination are on SPBA + * 11 Source Bit(SP) 1: Source on SPBA + * 0: Source on AIPS + * 12 Destination Bit(DP) 1: Destination on SPBA + * 0: Destination on AIPS + * 13-15 --------- MUST BE 0 + * 16-23 Higher WML HWML + * 24-27 N Total number of samples after + * which Pad adding/Swallowing + * must be done. It must be odd. + * 28 Lower WML Event(LWE) SDMA events reg to check for + * LWML event mask + * 0: LWE in EVENTS register + * 1: LWE in EVENTS2 register + * 29 Higher WML Event(HWE) SDMA events reg to check for + * HWML event mask + * 0: HWE in EVENTS register + * 1: HWE in EVENTS2 register + * 30 --------- MUST BE 0 + * 31 CONT 1: Amount of samples to be + * transferred is unknown and + * script will keep on + * transferring samples as long as + * both events are detected and + * script must be manually stopped + * by the application + * 0: The amount of samples to be + * transferred is equal to the + * count field of mode word + */ +#define SDMA_WATERMARK_LEVEL_LWML 0xFF +#define SDMA_WATERMARK_LEVEL_PS BIT(8) +#define SDMA_WATERMARK_LEVEL_PA BIT(9) +#define SDMA_WATERMARK_LEVEL_SPDIF BIT(10) +#define SDMA_WATERMARK_LEVEL_SP BIT(11) +#define SDMA_WATERMARK_LEVEL_DP BIT(12) +#define SDMA_WATERMARK_LEVEL_HWML (0xFF << 16) +#define SDMA_WATERMARK_LEVEL_LWE BIT(28) +#define SDMA_WATERMARK_LEVEL_HWE BIT(29) +#define SDMA_WATERMARK_LEVEL_CONT BIT(31) + +/* * Mode/Count of data node descriptors - IPCv2 */ struct sdma_mode_count { @@ -259,8 +313,9 @@ struct sdma_channel { struct sdma_buffer_descriptor *bd; dma_addr_t bd_phys; unsigned int pc_from_device, pc_to_device; + unsigned int device_to_device; unsigned long flags; - dma_addr_t per_address; + dma_addr_t per_address, per_address2; unsigned long event_mask[2]; unsigned long watermark_level; u32 shp_addr, per_addr; @@ -328,6 +383,8 @@ struct sdma_engine { u32 script_number; struct sdma_script_start_addrs *script_addrs; const struct sdma_driver_data *drvdata; + u32 spba_start_addr; + u32 spba_end_addr; }; static struct sdma_driver_data sdma_imx31 = { @@ -705,6 +762,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac, sdmac->pc_from_device = 0; sdmac->pc_to_device = 0; + sdmac->device_to_device = 0; switch (peripheral_type) { case IMX_DMATYPE_MEMORY: @@ -780,6 +838,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac, sdmac->pc_from_device = per_2_emi; sdmac->pc_to_device = emi_2_per; + sdmac->device_to_device = per_2_per; } static int sdma_load_context(struct sdma_channel *sdmac) @@ -792,11 +851,12 @@ static int sdma_load_context(struct sdma_channel *sdmac) int ret; unsigned long flags; - if (sdmac->direction == DMA_DEV_TO_MEM) { + if (sdmac->direction == DMA_DEV_TO_MEM) load_address = sdmac->pc_from_device; - } else { + else if (sdmac->direction == DMA_DEV_TO_DEV) + load_address = sdmac->device_to_device; + else load_address = sdmac->pc_to_device; - } if (load_address < 0) return load_address; @@ -851,6 +911,46 @@ static int sdma_disable_channel(struct dma_chan *chan) return 0; } +static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac) +{ + struct sdma_engine *sdma = sdmac->sdma; + + int lwml = sdmac->watermark_level & SDMA_WATERMARK_LEVEL_LWML; + int hwml = (sdmac->watermark_level & SDMA_WATERMARK_LEVEL_HWML) >> 16; + + set_bit(sdmac->event_id0 % 32, &sdmac->event_mask[1]); + set_bit(sdmac->event_id1 % 32, &sdmac->event_mask[0]); + + if (sdmac->event_id0 > 31) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_LWE; + + if (sdmac->event_id1 > 31) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_HWE; + + /* + * If LWML(src_maxburst) > HWML(dst_maxburst), we need + * swap LWML and HWML of INFO(A.3.2.5.1), also need swap + * r0(event_mask[1]) and r1(event_mask[0]). + */ + if (lwml > hwml) { + sdmac->watermark_level &= ~(SDMA_WATERMARK_LEVEL_LWML | + SDMA_WATERMARK_LEVEL_HWML); + sdmac->watermark_level |= hwml; + sdmac->watermark_level |= lwml << 16; + swap(sdmac->event_mask[0], sdmac->event_mask[1]); + } + + if (sdmac->per_address2 >= sdma->spba_start_addr && + sdmac->per_address2 <= sdma->spba_end_addr) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SP; + + if (sdmac->per_address >= sdma->spba_start_addr && + sdmac->per_address <= sdma->spba_end_addr) + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_DP; + + sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_CONT; +} + static int sdma_config_channel(struct dma_chan *chan) { struct sdma_channel *sdmac = to_sdma_chan(chan); @@ -869,6 +969,12 @@ static int sdma_config_channel(struct dma_chan *chan) sdma_event_enable(sdmac, sdmac->event_id0); } + if (sdmac->event_id1) { + if (sdmac->event_id1 >= sdmac->sdma->drvdata->num_events) + return -EINVAL; + sdma_event_enable(sdmac, sdmac->event_id1); + } + switch (sdmac->peripheral_type) { case IMX_DMATYPE_DSP: sdma_config_ownership(sdmac, false, true, true); @@ -887,19 +993,17 @@ static int sdma_config_channel(struct dma_chan *chan) (sdmac->peripheral_type != IMX_DMATYPE_DSP)) { /* Handle multiple event channels differently */ if (sdmac->event_id1) { - sdmac->event_mask[1] = BIT(sdmac->event_id1 % 32); - if (sdmac->event_id1 > 31) - __set_bit(31, &sdmac->watermark_level); - sdmac->event_mask[0] = BIT(sdmac->event_id0 % 32); - if (sdmac->event_id0 > 31) - __set_bit(30, &sdmac->watermark_level); - } else { + if (sdmac->peripheral_type == IMX_DMATYPE_ASRC_SP || + sdmac->peripheral_type == IMX_DMATYPE_ASRC) + sdma_set_watermarklevel_for_p2p(sdmac); + } else __set_bit(sdmac->event_id0, sdmac->event_mask); - } + /* Watermark Level */ sdmac->watermark_level |= sdmac->watermark_level; /* Address */ sdmac->shp_addr = sdmac->per_address; + sdmac->per_addr = sdmac->per_address2; } else { sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */ } @@ -987,17 +1091,22 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan) sdmac->peripheral_type = data->peripheral_type; sdmac->event_id0 = data->dma_request; + sdmac->event_id1 = data->dma_request2; - clk_enable(sdmac->sdma->clk_ipg); - clk_enable(sdmac->sdma->clk_ahb); + ret = clk_enable(sdmac->sdma->clk_ipg); + if (ret) + return ret; + ret = clk_enable(sdmac->sdma->clk_ahb); + if (ret) + goto disable_clk_ipg; ret = sdma_request_channel(sdmac); if (ret) - return ret; + goto disable_clk_ahb; ret = sdma_set_channel_priority(sdmac, prio); if (ret) - return ret; + goto disable_clk_ahb; dma_async_tx_descriptor_init(&sdmac->desc, chan); sdmac->desc.tx_submit = sdma_tx_submit; @@ -1005,6 +1114,12 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan) sdmac->desc.flags = DMA_CTRL_ACK; return 0; + +disable_clk_ahb: + clk_disable(sdmac->sdma->clk_ahb); +disable_clk_ipg: + clk_disable(sdmac->sdma->clk_ipg); + return ret; } static void sdma_free_chan_resources(struct dma_chan *chan) @@ -1221,6 +1336,14 @@ static int sdma_config(struct dma_chan *chan, sdmac->watermark_level = dmaengine_cfg->src_maxburst * dmaengine_cfg->src_addr_width; sdmac->word_size = dmaengine_cfg->src_addr_width; + } else if (dmaengine_cfg->direction == DMA_DEV_TO_DEV) { + sdmac->per_address2 = dmaengine_cfg->src_addr; + sdmac->per_address = dmaengine_cfg->dst_addr; + sdmac->watermark_level = dmaengine_cfg->src_maxburst & + SDMA_WATERMARK_LEVEL_LWML; + sdmac->watermark_level |= (dmaengine_cfg->dst_maxburst << 16) & + SDMA_WATERMARK_LEVEL_HWML; + sdmac->word_size = dmaengine_cfg->dst_addr_width; } else { sdmac->per_address = dmaengine_cfg->dst_addr; sdmac->watermark_level = dmaengine_cfg->dst_maxburst * @@ -1337,6 +1460,72 @@ err_firmware: release_firmware(fw); } +#define EVENT_REMAP_CELLS 3 + +static int __init sdma_event_remap(struct sdma_engine *sdma) +{ + struct device_node *np = sdma->dev->of_node; + struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0); + struct property *event_remap; + struct regmap *gpr; + char propname[] = "fsl,sdma-event-remap"; + u32 reg, val, shift, num_map, i; + int ret = 0; + + if (IS_ERR(np) || IS_ERR(gpr_np)) + goto out; + + event_remap = of_find_property(np, propname, NULL); + num_map = event_remap ? (event_remap->length / sizeof(u32)) : 0; + if (!num_map) { + dev_warn(sdma->dev, "no event needs to be remapped\n"); + goto out; + } else if (num_map % EVENT_REMAP_CELLS) { + dev_err(sdma->dev, "the property %s must modulo %d\n", + propname, EVENT_REMAP_CELLS); + ret = -EINVAL; + goto out; + } + + gpr = syscon_node_to_regmap(gpr_np); + if (IS_ERR(gpr)) { + dev_err(sdma->dev, "failed to get gpr regmap\n"); + ret = PTR_ERR(gpr); + goto out; + } + + for (i = 0; i < num_map; i += EVENT_REMAP_CELLS) { + ret = of_property_read_u32_index(np, propname, i, ®); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i); + goto out; + } + + ret = of_property_read_u32_index(np, propname, i + 1, &shift); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i + 1); + goto out; + } + + ret = of_property_read_u32_index(np, propname, i + 2, &val); + if (ret) { + dev_err(sdma->dev, "failed to read property %s index %d\n", + propname, i + 2); + goto out; + } + + regmap_update_bits(gpr, reg, BIT(shift), val << shift); + } + +out: + if (!IS_ERR(gpr_np)) + of_node_put(gpr_np); + + return ret; +} + static int sdma_get_firmware(struct sdma_engine *sdma, const char *fw_name) { @@ -1354,8 +1543,12 @@ static int sdma_init(struct sdma_engine *sdma) int i, ret; dma_addr_t ccb_phys; - clk_enable(sdma->clk_ipg); - clk_enable(sdma->clk_ahb); + ret = clk_enable(sdma->clk_ipg); + if (ret) + return ret; + ret = clk_enable(sdma->clk_ahb); + if (ret) + goto disable_clk_ipg; /* Be sure SDMA has not started yet */ writel_relaxed(0, sdma->regs + SDMA_H_C0PTR); @@ -1411,8 +1604,9 @@ static int sdma_init(struct sdma_engine *sdma) return 0; err_dma_alloc: - clk_disable(sdma->clk_ipg); clk_disable(sdma->clk_ahb); +disable_clk_ipg: + clk_disable(sdma->clk_ipg); dev_err(sdma->dev, "initialisation failed with %d\n", ret); return ret; } @@ -1444,6 +1638,14 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec, data.dma_request = dma_spec->args[0]; data.peripheral_type = dma_spec->args[1]; data.priority = dma_spec->args[2]; + /* + * init dma_request2 to zero, which is not used by the dts. + * For P2P, dma_request2 is init from dma_request_channel(), + * chan->private will point to the imx_dma_data, and in + * device_alloc_chan_resources(), imx_dma_data.dma_request2 will + * be set to sdmac->event_id1. + */ + data.dma_request2 = 0; return dma_request_channel(mask, sdma_filter_fn, &data); } @@ -1453,10 +1655,12 @@ static int sdma_probe(struct platform_device *pdev) const struct of_device_id *of_id = of_match_device(sdma_dt_ids, &pdev->dev); struct device_node *np = pdev->dev.of_node; + struct device_node *spba_bus; const char *fw_name; int ret; int irq; struct resource *iores; + struct resource spba_res; struct sdma_platform_data *pdata = dev_get_platdata(&pdev->dev); int i; struct sdma_engine *sdma; @@ -1551,6 +1755,10 @@ static int sdma_probe(struct platform_device *pdev) if (ret) goto err_init; + ret = sdma_event_remap(sdma); + if (ret) + goto err_init; + if (sdma->drvdata->script_addrs) sdma_add_scripts(sdma, sdma->drvdata->script_addrs); if (pdata && pdata->script_addrs) @@ -1608,6 +1816,14 @@ static int sdma_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to register controller\n"); goto err_register; } + + spba_bus = of_find_compatible_node(NULL, NULL, "fsl,spba-bus"); + ret = of_address_to_resource(spba_bus, 0, &spba_res); + if (!ret) { + sdma->spba_start_addr = spba_res.start; + sdma->spba_end_addr = spba_res.end; + } + of_node_put(spba_bus); } dev_info(sdma->dev, "initialized\n"); diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile index 0ff7270af25b..cf5fedbe2b75 100644 --- a/drivers/dma/ioat/Makefile +++ b/drivers/dma/ioat/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-y := pci.o dma.o dma_v2.o dma_v3.o dca.o +ioatdma-y := init.o dma.o prep.o dca.o sysfs.o diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index ea1e107ae884..2cb7c308d5c7 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -31,7 +31,6 @@ #include "dma.h" #include "registers.h" -#include "dma_v2.h" /* * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 @@ -71,14 +70,6 @@ static inline int dca2_tag_map_valid(u8 *tag_map) #define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x)) #define IOAT_TAG_MAP_LEN 8 -static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; -static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; -static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = { - 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), }; -static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 }; - /* pack PCI B/D/F into a u16 */ static inline u16 dcaid_from_pcidev(struct pci_dev *pci) { @@ -126,96 +117,6 @@ struct ioat_dca_priv { struct ioat_dca_slot req_slots[0]; }; -/* 5000 series chipset DCA Port Requester ID Table Entry Format - * [15:8] PCI-Express Bus Number - * [7:3] PCI-Express Device Number - * [2:0] PCI-Express Function Number - * - * 5000 series chipset DCA control register format - * [7:1] Reserved (0) - * [0] Ignore Function Number - */ - -static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 id; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - id = dcaid_from_pcidev(pdev); - - if (ioatdca->requester_count == ioatdca->max_requesters) - return -ENODEV; - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == NULL) { - /* found an empty slot */ - ioatdca->requester_count++; - ioatdca->req_slots[i].pdev = pdev; - ioatdca->req_slots[i].rid = id; - writew(id, ioatdca->dca_base + (i * 4)); - /* make sure the ignore function bit is off */ - writeb(0, ioatdca->dca_base + (i * 4) + 2); - return i; - } - } - /* Error, ioatdma->requester_count is out of whack */ - return -EFAULT; -} - -static int ioat_dca_remove_requester(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) { - writew(0, ioatdca->dca_base + (i * 4)); - ioatdca->req_slots[i].pdev = NULL; - ioatdca->req_slots[i].rid = 0; - ioatdca->requester_count--; - return i; - } - } - return -ENODEV; -} - -static u8 ioat_dca_get_tag(struct dca_provider *dca, - struct device *dev, - int cpu) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - int i, apic_id, bit, value; - u8 entry, tag; - - tag = 0; - apic_id = cpu_physical_id(cpu); - - for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { - entry = ioatdca->tag_map[i]; - if (entry & DCA_TAG_MAP_VALID) { - bit = entry & ~DCA_TAG_MAP_VALID; - value = (apic_id & (1 << bit)) ? 1 : 0; - } else { - value = entry ? 1 : 0; - } - tag |= (value << i); - } - return tag; -} - static int ioat_dca_dev_managed(struct dca_provider *dca, struct device *dev) { @@ -231,260 +132,7 @@ static int ioat_dca_dev_managed(struct dca_provider *dca, return 0; } -static struct dca_ops ioat_dca_ops = { - .add_requester = ioat_dca_add_requester, - .remove_requester = ioat_dca_remove_requester, - .get_tag = ioat_dca_get_tag, - .dev_managed = ioat_dca_dev_managed, -}; - - -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) -{ - struct dca_provider *dca; - struct ioat_dca_priv *ioatdca; - u8 *tag_map = NULL; - int i; - int err; - u8 version; - u8 max_requesters; - - if (!system_has_dca_enabled(pdev)) - return NULL; - - /* I/OAT v1 systems must have a known tag_map to support DCA */ - switch (pdev->vendor) { - case PCI_VENDOR_ID_INTEL: - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT: - tag_map = ioat_tag_map_BNB; - break; - case PCI_DEVICE_ID_INTEL_IOAT_CNB: - tag_map = ioat_tag_map_CNB; - break; - case PCI_DEVICE_ID_INTEL_IOAT_SCNB: - tag_map = ioat_tag_map_SCNB; - break; - } - break; - case PCI_VENDOR_ID_UNISYS: - switch (pdev->device) { - case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR: - tag_map = ioat_tag_map_UNISYS; - break; - } - break; - } - if (tag_map == NULL) - return NULL; - - version = readb(iobase + IOAT_VER_OFFSET); - if (version == IOAT_VER_3_0) - max_requesters = IOAT3_DCA_MAX_REQ; - else - max_requesters = IOAT_DCA_MAX_REQ; - - dca = alloc_dca_provider(&ioat_dca_ops, - sizeof(*ioatdca) + - (sizeof(struct ioat_dca_slot) * max_requesters)); - if (!dca) - return NULL; - - ioatdca = dca_priv(dca); - ioatdca->max_requesters = max_requesters; - ioatdca->dca_base = iobase + 0x54; - - /* copy over the APIC ID to DCA tag mapping */ - for (i = 0; i < IOAT_TAG_MAP_LEN; i++) - ioatdca->tag_map[i] = tag_map[i]; - - err = register_dca_provider(dca, &pdev->dev); - if (err) { - free_dca_provider(dca); - return NULL; - } - - return dca; -} - - -static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 id; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - id = dcaid_from_pcidev(pdev); - - if (ioatdca->requester_count == ioatdca->max_requesters) - return -ENODEV; - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == NULL) { - /* found an empty slot */ - ioatdca->requester_count++; - ioatdca->req_slots[i].pdev = pdev; - ioatdca->req_slots[i].rid = id; - global_req_table = - readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); - writel(id | IOAT_DCA_GREQID_VALID, - ioatdca->iobase + global_req_table + (i * 4)); - return i; - } - } - /* Error, ioatdma->requester_count is out of whack */ - return -EFAULT; -} - -static int ioat2_dca_remove_requester(struct dca_provider *dca, - struct device *dev) -{ - struct ioat_dca_priv *ioatdca = dca_priv(dca); - struct pci_dev *pdev; - int i; - u16 global_req_table; - - /* This implementation only supports PCI-Express */ - if (!dev_is_pci(dev)) - return -ENODEV; - pdev = to_pci_dev(dev); - - for (i = 0; i < ioatdca->max_requesters; i++) { - if (ioatdca->req_slots[i].pdev == pdev) { - global_req_table = - readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); - writel(0, ioatdca->iobase + global_req_table + (i * 4)); - ioatdca->req_slots[i].pdev = NULL; - ioatdca->req_slots[i].rid = 0; - ioatdca->requester_count--; - return i; - } - } - return -ENODEV; -} - -static u8 ioat2_dca_get_tag(struct dca_provider *dca, - struct device *dev, - int cpu) -{ - u8 tag; - - tag = ioat_dca_get_tag(dca, dev, cpu); - tag = (~tag) & 0x1F; - return tag; -} - -static struct dca_ops ioat2_dca_ops = { - .add_requester = ioat2_dca_add_requester, - .remove_requester = ioat2_dca_remove_requester, - .get_tag = ioat2_dca_get_tag, - .dev_managed = ioat_dca_dev_managed, -}; - -static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) -{ - int slots = 0; - u32 req; - u16 global_req_table; - - global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET); - if (global_req_table == 0) - return 0; - do { - req = readl(iobase + global_req_table + (slots * sizeof(u32))); - slots++; - } while ((req & IOAT_DCA_GREQID_LASTID) == 0); - - return slots; -} - -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) -{ - struct dca_provider *dca; - struct ioat_dca_priv *ioatdca; - int slots; - int i; - int err; - u32 tag_map; - u16 dca_offset; - u16 csi_fsb_control; - u16 pcie_control; - u8 bit; - - if (!system_has_dca_enabled(pdev)) - return NULL; - - dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); - if (dca_offset == 0) - return NULL; - - slots = ioat2_dca_count_dca_slots(iobase, dca_offset); - if (slots == 0) - return NULL; - - dca = alloc_dca_provider(&ioat2_dca_ops, - sizeof(*ioatdca) - + (sizeof(struct ioat_dca_slot) * slots)); - if (!dca) - return NULL; - - ioatdca = dca_priv(dca); - ioatdca->iobase = iobase; - ioatdca->dca_base = iobase + dca_offset; - ioatdca->max_requesters = slots; - - /* some bios might not know to turn these on */ - csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); - if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) { - csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH; - writew(csi_fsb_control, - ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); - } - pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); - if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) { - pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR; - writew(pcie_control, - ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); - } - - - /* TODO version, compatibility and configuration checks */ - - /* copy out the APIC to DCA tag map */ - tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET); - for (i = 0; i < 5; i++) { - bit = (tag_map >> (4 * i)) & 0x0f; - if (bit < 8) - ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; - else - ioatdca->tag_map[i] = 0; - } - - if (!dca2_tag_map_valid(ioatdca->tag_map)) { - WARN_TAINT_ONCE(1, TAINT_FIRMWARE_WORKAROUND, - "%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", - dev_driver_string(&pdev->dev), - dev_name(&pdev->dev)); - free_dca_provider(dca); - return NULL; - } - - err = register_dca_provider(dca, &pdev->dev); - if (err) { - free_dca_provider(dca); - return NULL; - } - - return dca; -} - -static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) +static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) { struct ioat_dca_priv *ioatdca = dca_priv(dca); struct pci_dev *pdev; @@ -518,7 +166,7 @@ static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) return -EFAULT; } -static int ioat3_dca_remove_requester(struct dca_provider *dca, +static int ioat_dca_remove_requester(struct dca_provider *dca, struct device *dev) { struct ioat_dca_priv *ioatdca = dca_priv(dca); @@ -545,7 +193,7 @@ static int ioat3_dca_remove_requester(struct dca_provider *dca, return -ENODEV; } -static u8 ioat3_dca_get_tag(struct dca_provider *dca, +static u8 ioat_dca_get_tag(struct dca_provider *dca, struct device *dev, int cpu) { @@ -576,14 +224,14 @@ static u8 ioat3_dca_get_tag(struct dca_provider *dca, return tag; } -static struct dca_ops ioat3_dca_ops = { - .add_requester = ioat3_dca_add_requester, - .remove_requester = ioat3_dca_remove_requester, - .get_tag = ioat3_dca_get_tag, +static struct dca_ops ioat_dca_ops = { + .add_requester = ioat_dca_add_requester, + .remove_requester = ioat_dca_remove_requester, + .get_tag = ioat_dca_get_tag, .dev_managed = ioat_dca_dev_managed, }; -static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) +static int ioat_dca_count_dca_slots(void *iobase, u16 dca_offset) { int slots = 0; u32 req; @@ -618,7 +266,7 @@ static inline int dca3_tag_map_invalid(u8 *tag_map) (tag_map[4] == DCA_TAG_MAP_VALID)); } -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; @@ -645,11 +293,11 @@ struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) if (dca_offset == 0) return NULL; - slots = ioat3_dca_count_dca_slots(iobase, dca_offset); + slots = ioat_dca_count_dca_slots(iobase, dca_offset); if (slots == 0) return NULL; - dca = alloc_dca_provider(&ioat3_dca_ops, + dca = alloc_dca_provider(&ioat_dca_ops, sizeof(*ioatdca) + (sizeof(struct ioat_dca_slot) * slots)); if (!dca) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index ee0aa9f4ccfa..50d0112b602a 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1,6 +1,6 @@ /* * Intel I/OAT DMA Linux driver - * Copyright(c) 2004 - 2009 Intel Corporation. + * Copyright(c) 2004 - 2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -31,31 +31,23 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/prefetch.h> -#include <linux/i7300_idle.h> #include "dma.h" #include "registers.h" #include "hw.h" #include "../dmaengine.h" -int ioat_pending_level = 4; -module_param(ioat_pending_level, int, 0644); -MODULE_PARM_DESC(ioat_pending_level, - "high-water mark for pushing ioat descriptors (default: 4)"); - -/* internal functions */ -static void ioat1_cleanup(struct ioat_dma_chan *ioat); -static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat); +static void ioat_eh(struct ioatdma_chan *ioat_chan); /** * ioat_dma_do_interrupt - handler used for single vector interrupt mode * @irq: interrupt id * @data: interrupt data */ -static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) +irqreturn_t ioat_dma_do_interrupt(int irq, void *data) { struct ioatdma_device *instance = data; - struct ioat_chan_common *chan; + struct ioatdma_chan *ioat_chan; unsigned long attnstatus; int bit; u8 intrctrl; @@ -72,9 +64,9 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) { - chan = ioat_chan_by_index(instance, bit); - if (test_bit(IOAT_RUN, &chan->state)) - tasklet_schedule(&chan->cleanup_task); + ioat_chan = ioat_chan_by_index(instance, bit); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + tasklet_schedule(&ioat_chan->cleanup_task); } writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); @@ -86,1161 +78,910 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) * @irq: interrupt id * @data: interrupt data */ -static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) +irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) { - struct ioat_chan_common *chan = data; + struct ioatdma_chan *ioat_chan = data; - if (test_bit(IOAT_RUN, &chan->state)) - tasklet_schedule(&chan->cleanup_task); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + tasklet_schedule(&ioat_chan->cleanup_task); return IRQ_HANDLED; } -/* common channel initialization */ -void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx) +void ioat_stop(struct ioatdma_chan *ioat_chan) { - struct dma_device *dma = &device->common; - struct dma_chan *c = &chan->common; - unsigned long data = (unsigned long) c; - - chan->device = device; - chan->reg_base = device->reg_base + (0x80 * (idx + 1)); - spin_lock_init(&chan->cleanup_lock); - chan->common.device = dma; - dma_cookie_init(&chan->common); - list_add_tail(&chan->common.device_node, &dma->channels); - device->idx[idx] = chan; - init_timer(&chan->timer); - chan->timer.function = device->timer_fn; - chan->timer.data = data; - tasklet_init(&chan->cleanup_task, device->cleanup_fn, data); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct pci_dev *pdev = ioat_dma->pdev; + int chan_id = chan_num(ioat_chan); + struct msix_entry *msix; + + /* 1/ stop irq from firing tasklets + * 2/ stop the tasklet from re-arming irqs + */ + clear_bit(IOAT_RUN, &ioat_chan->state); + + /* flush inflight interrupts */ + switch (ioat_dma->irq_mode) { + case IOAT_MSIX: + msix = &ioat_dma->msix_entries[chan_id]; + synchronize_irq(msix->vector); + break; + case IOAT_MSI: + case IOAT_INTX: + synchronize_irq(pdev->irq); + break; + default: + break; + } + + /* flush inflight timers */ + del_timer_sync(&ioat_chan->timer); + + /* flush inflight tasklet runs */ + tasklet_kill(&ioat_chan->cleanup_task); + + /* final cleanup now that everything is quiesced and can't re-arm */ + ioat_cleanup_event((unsigned long)&ioat_chan->dma_chan); } -/** - * ioat1_dma_enumerate_channels - find and initialize the device's channels - * @device: the device to be enumerated - */ -static int ioat1_enumerate_channels(struct ioatdma_device *device) +static void __ioat_issue_pending(struct ioatdma_chan *ioat_chan) { - u8 xfercap_scale; - u32 xfercap; - int i; - struct ioat_dma_chan *ioat; - struct device *dev = &device->pdev->dev; - struct dma_device *dma = &device->common; - - INIT_LIST_HEAD(&dma->channels); - dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); - dma->chancnt &= 0x1f; /* bits [4:0] valid */ - if (dma->chancnt > ARRAY_SIZE(device->idx)) { - dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", - dma->chancnt, ARRAY_SIZE(device->idx)); - dma->chancnt = ARRAY_SIZE(device->idx); - } - xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); - xfercap_scale &= 0x1f; /* bits [4:0] valid */ - xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); - dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap); - -#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) - dma->chancnt--; -#endif - for (i = 0; i < dma->chancnt; i++) { - ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); - if (!ioat) - break; + ioat_chan->dmacount += ioat_ring_pending(ioat_chan); + ioat_chan->issued = ioat_chan->head; + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, + ioat_chan->issued, ioat_chan->dmacount); +} + +void ioat_issue_pending(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); - ioat_init_channel(device, &ioat->base, i); - ioat->xfercap = xfercap; - spin_lock_init(&ioat->desc_lock); - INIT_LIST_HEAD(&ioat->free_desc); - INIT_LIST_HEAD(&ioat->used_desc); + if (ioat_ring_pending(ioat_chan)) { + spin_lock_bh(&ioat_chan->prep_lock); + __ioat_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); } - dma->chancnt = i; - return i; } /** - * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended - * descriptors to hw - * @chan: DMA channel handle + * ioat_update_pending - log pending descriptors + * @ioat: ioat+ channel + * + * Check if the number of unsubmitted descriptors has exceeded the + * watermark. Called with prep_lock held */ -static inline void -__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) +static void ioat_update_pending(struct ioatdma_chan *ioat_chan) { - void __iomem *reg_base = ioat->base.reg_base; - - dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n", - __func__, ioat->pending); - ioat->pending = 0; - writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET); + if (ioat_ring_pending(ioat_chan) > ioat_pending_level) + __ioat_issue_pending(ioat_chan); } -static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) +static void __ioat_start_null_desc(struct ioatdma_chan *ioat_chan) { - struct ioat_dma_chan *ioat = to_ioat_chan(chan); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; - if (ioat->pending > 0) { - spin_lock_bh(&ioat->desc_lock); - __ioat1_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); + if (ioat_ring_space(ioat_chan) < 1) { + dev_err(to_dev(ioat_chan), + "Unable to start null desc - ring full\n"); + return; } + + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued); + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + ioat_set_chainaddr(ioat_chan, desc->txd.phys); + dump_desc_dbg(ioat_chan, desc); + /* make sure descriptors are written before we submit */ + wmb(); + ioat_chan->head += 1; + __ioat_issue_pending(ioat_chan); } -/** - * ioat1_reset_channel - restart a channel - * @ioat: IOAT DMA channel handle - */ -static void ioat1_reset_channel(struct ioat_dma_chan *ioat) +void ioat_start_null_desc(struct ioatdma_chan *ioat_chan) { - struct ioat_chan_common *chan = &ioat->base; - void __iomem *reg_base = chan->reg_base; - u32 chansts, chanerr; - - dev_warn(to_dev(chan), "reset\n"); - chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); - chansts = *chan->completion & IOAT_CHANSTS_STATUS; - if (chanerr) { - dev_err(to_dev(chan), - "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", - chan_num(chan), chansts, chanerr); - writel(chanerr, reg_base + IOAT_CHANERR_OFFSET); + spin_lock_bh(&ioat_chan->prep_lock); + __ioat_start_null_desc(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); +} + +static void __ioat_restart_chan(struct ioatdma_chan *ioat_chan) +{ + /* set the tail to be re-issued */ + ioat_chan->issued = ioat_chan->tail; + ioat_chan->dmacount = 0; + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + + dev_dbg(to_dev(ioat_chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, + ioat_chan->issued, ioat_chan->dmacount); + + if (ioat_ring_pending(ioat_chan)) { + struct ioat_ring_ent *desc; + + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail); + ioat_set_chainaddr(ioat_chan, desc->txd.phys); + __ioat_issue_pending(ioat_chan); + } else + __ioat_start_null_desc(ioat_chan); +} + +static int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + u32 status; + + status = ioat_chansts(ioat_chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(ioat_chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + if (tmo && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + status = ioat_chansts(ioat_chan); + cpu_relax(); } - /* - * whack it upside the head with a reset - * and wait for things to settle out. - * force the pending count to a really big negative - * to make sure no one forces an issue_pending - * while we're waiting. - */ + return err; +} - ioat->pending = INT_MIN; - writeb(IOAT_CHANCMD_RESET, - reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - set_bit(IOAT_RESET_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + RESET_DELAY); +static int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + + ioat_reset(ioat_chan); + while (ioat_reset_pending(ioat_chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + cpu_relax(); + } + + return err; } -static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) +static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx) { struct dma_chan *c = tx->chan; - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); - struct ioat_chan_common *chan = &ioat->base; - struct ioat_desc_sw *first; - struct ioat_desc_sw *chain_tail; + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); dma_cookie_t cookie; - spin_lock_bh(&ioat->desc_lock); - /* cookie incr and addition to used_list must be atomic */ cookie = dma_cookie_assign(tx); - dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + dev_dbg(to_dev(ioat_chan), "%s: cookie: %d\n", __func__, cookie); + + if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); - /* write address into NextDescriptor field of last desc in chain */ - first = to_ioat_desc(desc->tx_list.next); - chain_tail = to_ioat_desc(ioat->used_desc.prev); - /* make descriptor updates globally visible before chaining */ + /* make descriptor updates visible before advancing ioat->head, + * this is purposefully not smp_wmb() since we are also + * publishing the descriptor updates to a dma device + */ wmb(); - chain_tail->hw->next = first->txd.phys; - list_splice_tail_init(&desc->tx_list, &ioat->used_desc); - dump_desc_dbg(ioat, chain_tail); - dump_desc_dbg(ioat, first); - if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat_chan->head += ioat_chan->produce; - ioat->active += desc->hw->tx_cnt; - ioat->pending += desc->hw->tx_cnt; - if (ioat->pending >= ioat_pending_level) - __ioat1_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); + ioat_update_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); return cookie; } -/** - * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair - * @ioat: the channel supplying the memory pool for the descriptors - * @flags: allocation flags - */ -static struct ioat_desc_sw * -ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) +static struct ioat_ring_ent * +ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) { - struct ioat_dma_descriptor *desc; - struct ioat_desc_sw *desc_sw; - struct ioatdma_device *ioatdma_device; + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + struct ioatdma_device *ioat_dma; dma_addr_t phys; - ioatdma_device = ioat->base.device; - desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); - if (unlikely(!desc)) + ioat_dma = to_ioatdma_device(chan->device); + hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys); + if (!hw) return NULL; + memset(hw, 0, sizeof(*hw)); - desc_sw = kzalloc(sizeof(*desc_sw), flags); - if (unlikely(!desc_sw)) { - pci_pool_free(ioatdma_device->dma_pool, desc, phys); + desc = kmem_cache_zalloc(ioat_cache, flags); + if (!desc) { + pci_pool_free(ioat_dma->dma_pool, hw, phys); return NULL; } - memset(desc, 0, sizeof(*desc)); + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = ioat_tx_submit_unlock; + desc->hw = hw; + desc->txd.phys = phys; + return desc; +} - INIT_LIST_HEAD(&desc_sw->tx_list); - dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); - desc_sw->txd.tx_submit = ioat1_tx_submit; - desc_sw->hw = desc; - desc_sw->txd.phys = phys; - set_desc_id(desc_sw, -1); +void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) +{ + struct ioatdma_device *ioat_dma; - return desc_sw; + ioat_dma = to_ioatdma_device(chan->device); + pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys); + kmem_cache_free(ioat_cache, desc); } -static int ioat_initial_desc_count = 256; -module_param(ioat_initial_desc_count, int, 0644); -MODULE_PARM_DESC(ioat_initial_desc_count, - "ioat1: initial descriptors per channel (default: 256)"); -/** - * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors - * @chan: the channel to be filled out - */ -static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) +struct ioat_ring_ent ** +ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) { - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioat_desc_sw *desc; - u32 chanerr; + struct ioat_ring_ent **ring; + int descs = 1 << order; int i; - LIST_HEAD(tmp_list); - - /* have we already been set up? */ - if (!list_empty(&ioat->free_desc)) - return ioat->desccount; - /* Setup register to interrupt and write completion status on error */ - writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); + if (order > ioat_get_max_alloc_order()) + return NULL; - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), flags); + if (!ring) + return NULL; + for (i = 0; i < descs; i++) { + ring[i] = ioat_alloc_ring_ent(c, flags); + if (!ring[i]) { + while (i--) + ioat_free_ring_ent(ring[i], c); + kfree(ring); + return NULL; + } + set_desc_id(ring[i], i); } - /* Allocate descriptors */ - for (i = 0; i < ioat_initial_desc_count; i++) { - desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL); - if (!desc) { - dev_err(to_dev(chan), "Only %d initial descriptors\n", i); - break; - } - set_desc_id(desc, i); - list_add_tail(&desc->node, &tmp_list); + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; } - spin_lock_bh(&ioat->desc_lock); - ioat->desccount = i; - list_splice(&tmp_list, &ioat->free_desc); - spin_unlock_bh(&ioat->desc_lock); - - /* allocate a completion writeback area */ - /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - chan->completion = pci_pool_alloc(chan->device->completion_pool, - GFP_KERNEL, &chan->completion_dma); - memset(chan->completion, 0, sizeof(*chan->completion)); - writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) chan->completion_dma) >> 32, - chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - set_bit(IOAT_RUN, &chan->state); - ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ - dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", - __func__, ioat->desccount); - return ioat->desccount; + ring[i]->hw->next = ring[0]->txd.phys; + + return ring; } -void ioat_stop(struct ioat_chan_common *chan) +static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order) { - struct ioatdma_device *device = chan->device; - struct pci_dev *pdev = device->pdev; - int chan_id = chan_num(chan); - struct msix_entry *msix; + /* reshape differs from normal ring allocation in that we want + * to allocate a new software ring while only + * extending/truncating the hardware ring + */ + struct dma_chan *c = &ioat_chan->dma_chan; + const u32 curr_size = ioat_ring_size(ioat_chan); + const u16 active = ioat_ring_active(ioat_chan); + const u32 new_size = 1 << order; + struct ioat_ring_ent **ring; + u32 i; + + if (order > ioat_get_max_alloc_order()) + return false; - /* 1/ stop irq from firing tasklets - * 2/ stop the tasklet from re-arming irqs + /* double check that we have at least 1 free descriptor */ + if (active == curr_size) + return false; + + /* when shrinking, verify that we can hold the current active + * set in the new ring */ - clear_bit(IOAT_RUN, &chan->state); + if (active >= new_size) + return false; - /* flush inflight interrupts */ - switch (device->irq_mode) { - case IOAT_MSIX: - msix = &device->msix_entries[chan_id]; - synchronize_irq(msix->vector); - break; - case IOAT_MSI: - case IOAT_INTX: - synchronize_irq(pdev->irq); - break; - default: - break; - } + /* allocate the array to hold the software ring */ + ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); + if (!ring) + return false; - /* flush inflight timers */ - del_timer_sync(&chan->timer); + /* allocate/trim descriptors as needed */ + if (new_size > curr_size) { + /* copy current descriptors to the new ring */ + for (i = 0; i < curr_size; i++) { + u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); - /* flush inflight tasklet runs */ - tasklet_kill(&chan->cleanup_task); + ring[new_idx] = ioat_chan->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } - /* final cleanup now that everything is quiesced and can't re-arm */ - device->cleanup_fn((unsigned long) &chan->common); -} + /* add new descriptors to the ring */ + for (i = curr_size; i < new_size; i++) { + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); -/** - * ioat1_dma_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned - */ -static void ioat1_dma_free_chan_resources(struct dma_chan *c) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *ioatdma_device = chan->device; - struct ioat_desc_sw *desc, *_desc; - int in_use_descs = 0; - - /* Before freeing channel resources first check - * if they have been previously allocated for this channel. - */ - if (ioat->desccount == 0) - return; + ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT); + if (!ring[new_idx]) { + while (i--) { + u16 new_idx = (ioat_chan->tail+i) & + (new_size-1); + + ioat_free_ring_ent(ring[new_idx], c); + } + kfree(ring); + return false; + } + set_desc_id(ring[new_idx], new_idx); + } - ioat_stop(chan); + /* hw link new descriptors */ + for (i = curr_size-1; i < new_size; i++) { + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); + struct ioat_ring_ent *next = + ring[(new_idx+1) & (new_size-1)]; + struct ioat_dma_descriptor *hw = ring[new_idx]->hw; - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - mdelay(100); - - spin_lock_bh(&ioat->desc_lock); - list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { - dev_dbg(to_dev(chan), "%s: freeing %d from used list\n", - __func__, desc_id(desc)); - dump_desc_dbg(ioat, desc); - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, - &ioat->free_desc, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); + hw->next = next->txd.phys; + } + } else { + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *next; + + /* copy current descriptors to the new ring, dropping the + * removed descriptors + */ + for (i = 0; i < new_size; i++) { + u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1); + u16 new_idx = (ioat_chan->tail+i) & (new_size-1); + + ring[new_idx] = ioat_chan->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* free deleted descriptors */ + for (i = new_size; i < curr_size; i++) { + struct ioat_ring_ent *ent; + + ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i); + ioat_free_ring_ent(ent, c); + } + + /* fix up hardware ring */ + hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw; + next = ring[(ioat_chan->tail+new_size) & (new_size-1)]; + hw->next = next->txd.phys; } - spin_unlock_bh(&ioat->desc_lock); - pci_pool_free(ioatdma_device->completion_pool, - chan->completion, - chan->completion_dma); + dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n", + __func__, new_size); - /* one is ok since we left it on there on purpose */ - if (in_use_descs > 1) - dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", - in_use_descs - 1); + kfree(ioat_chan->ring); + ioat_chan->ring = ring; + ioat_chan->alloc_order = order; - chan->last_completion = 0; - chan->completion_dma = 0; - ioat->pending = 0; - ioat->desccount = 0; + return true; } /** - * ioat1_dma_get_next_descriptor - return the next available descriptor - * @ioat: IOAT DMA channel handle - * - * Gets the next descriptor from the chain, and must be called with the - * channel's desc_lock held. Allocates more descriptors if the channel - * has run out. + * ioat_check_space_lock - verify space and grab ring producer lock + * @ioat: ioat,3 channel (ring) to operate on + * @num_descs: allocation length */ -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) +int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs) { - struct ioat_desc_sw *new; + bool retry; - if (!list_empty(&ioat->free_desc)) { - new = to_ioat_desc(ioat->free_desc.next); - list_del(&new->node); - } else { - /* try to get another desc */ - new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); - if (!new) { - dev_err(to_dev(&ioat->base), "alloc failed\n"); - return NULL; - } + retry: + spin_lock_bh(&ioat_chan->prep_lock); + /* never allow the last descriptor to be consumed, we need at + * least one free at all times to allow for on-the-fly ring + * resizing. + */ + if (likely(ioat_ring_space(ioat_chan) > num_descs)) { + dev_dbg(to_dev(ioat_chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat_chan->head, + ioat_chan->tail, ioat_chan->issued); + ioat_chan->produce = num_descs; + return 0; /* with ioat->prep_lock held */ + } + retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + + /* is another cpu already trying to expand the ring? */ + if (retry) + goto retry; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1); + clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + /* if we were able to expand the ring retry the allocation */ + if (retry) + goto retry; + + dev_dbg_ratelimited(to_dev(ioat_chan), + "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat_chan->head, + ioat_chan->tail, ioat_chan->issued); + + /* progress reclaim in the allocation failure case we may be + * called under bh_disabled so we need to trigger the timer + * event directly + */ + if (time_is_before_jiffies(ioat_chan->timer.expires) + && timer_pending(&ioat_chan->timer)) { + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat_timer_event((unsigned long)ioat_chan); } - dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n", - __func__, desc_id(new)); - prefetch(new->hw); - return new; + + return -ENOMEM; } -static struct dma_async_tx_descriptor * -ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) +static bool desc_has_ext(struct ioat_ring_ent *desc) { - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_desc_sw *desc; - size_t copy; - LIST_HEAD(chain); - dma_addr_t src = dma_src; - dma_addr_t dest = dma_dest; - size_t total_len = len; - struct ioat_dma_descriptor *hw = NULL; - int tx_cnt = 0; - - spin_lock_bh(&ioat->desc_lock); - desc = ioat1_dma_get_next_descriptor(ioat); - do { - if (!desc) - break; - - tx_cnt++; - copy = min_t(size_t, len, ioat->xfercap); + struct ioat_dma_descriptor *hw = desc->hw; - hw = desc->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dest; + if (hw->ctl_f.op == IOAT_OP_XOR || + hw->ctl_f.op == IOAT_OP_XOR_VAL) { + struct ioat_xor_descriptor *xor = desc->xor; - list_add_tail(&desc->node, &chain); + if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) + return true; + } else if (hw->ctl_f.op == IOAT_OP_PQ || + hw->ctl_f.op == IOAT_OP_PQ_VAL) { + struct ioat_pq_descriptor *pq = desc->pq; - len -= copy; - dest += copy; - src += copy; - if (len) { - struct ioat_desc_sw *next; - - async_tx_ack(&desc->txd); - next = ioat1_dma_get_next_descriptor(ioat); - hw->next = next ? next->txd.phys : 0; - dump_desc_dbg(ioat, desc); - desc = next; - } else - hw->next = 0; - } while (len); - - if (!desc) { - struct ioat_chan_common *chan = &ioat->base; - - dev_err(to_dev(chan), - "chan%d - get_next_desc failed\n", chan_num(chan)); - list_splice(&chain, &ioat->free_desc); - spin_unlock_bh(&ioat->desc_lock); - return NULL; + if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) + return true; } - spin_unlock_bh(&ioat->desc_lock); - desc->txd.flags = flags; - desc->len = total_len; - list_splice(&chain, &desc->tx_list); - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->tx_cnt = tx_cnt; - dump_desc_dbg(ioat, desc); - - return &desc->txd; + return false; } -static void ioat1_cleanup_event(unsigned long data) +static void +ioat_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed) { - struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - - ioat1_cleanup(ioat); - if (!test_bit(IOAT_RUN, &chan->state)) + if (!sed) return; - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); + + dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); + kmem_cache_free(ioat_sed_cache, sed); } -dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan) +static u64 ioat_get_current_completion(struct ioatdma_chan *ioat_chan) { - dma_addr_t phys_complete; + u64 phys_complete; u64 completion; - completion = *chan->completion; + completion = *ioat_chan->completion; phys_complete = ioat_chansts_to_addr(completion); - dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, + dev_dbg(to_dev(ioat_chan), "%s: phys_complete: %#llx\n", __func__, (unsigned long long) phys_complete); - if (is_ioat_halted(completion)) { - u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", - chanerr); - - /* TODO do something to salvage the situation */ - } - return phys_complete; } -bool ioat_cleanup_preamble(struct ioat_chan_common *chan, - dma_addr_t *phys_complete) +static bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan, + u64 *phys_complete) { - *phys_complete = ioat_get_current_completion(chan); - if (*phys_complete == chan->last_completion) + *phys_complete = ioat_get_current_completion(ioat_chan); + if (*phys_complete == ioat_chan->last_completion) return false; - clear_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + clear_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); return true; } -static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete) +static void +desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) { - struct ioat_chan_common *chan = &ioat->base; - struct list_head *_desc, *n; - struct dma_async_tx_descriptor *tx; + struct ioat_dma_descriptor *hw = desc->hw; - dev_dbg(to_dev(chan), "%s: phys_complete: %llx\n", - __func__, (unsigned long long) phys_complete); - list_for_each_safe(_desc, n, &ioat->used_desc) { - struct ioat_desc_sw *desc; + switch (hw->ctl_f.op) { + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + { + struct ioat_pq_descriptor *pq = desc->pq; - prefetch(n); - desc = list_entry(_desc, typeof(*desc), node); - tx = &desc->txd; - /* - * Incoming DMA requests may use multiple descriptors, - * due to exceeding xfercap, perhaps. If so, only the - * last one will have a cookie, and require unmapping. - */ - dump_desc_dbg(ioat, desc); - if (tx->cookie) { - dma_cookie_complete(tx); - dma_descriptor_unmap(tx); - ioat->active -= desc->hw->tx_cnt; - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } + /* check if there's error written */ + if (!pq->dwbes_f.wbes) + return; - if (tx->phys != phys_complete) { - /* - * a completed entry, but not the last, so clean - * up if the client is done with the descriptor - */ - if (async_tx_test_ack(tx)) - list_move_tail(&desc->node, &ioat->free_desc); - } else { - /* - * last used desc. Do not remove, so we can - * append from it. - */ - - /* if nothing else is pending, cancel the - * completion timeout - */ - if (n == &ioat->used_desc) { - dev_dbg(to_dev(chan), - "%s cancel completion timeout\n", - __func__); - clear_bit(IOAT_COMPLETION_PENDING, &chan->state); - } + /* need to set a chanerr var for checking to clear later */ - /* TODO check status bits? */ - break; - } - } + if (pq->dwbes_f.p_val_err) + *desc->result |= SUM_CHECK_P_RESULT; + + if (pq->dwbes_f.q_val_err) + *desc->result |= SUM_CHECK_Q_RESULT; - chan->last_completion = phys_complete; + return; + } + default: + return; + } } /** - * ioat1_cleanup - cleanup up finished descriptors - * @chan: ioat channel to be cleaned up - * - * To prevent lock contention we defer cleanup when the locks are - * contended with a terminal timeout that forces cleanup and catches - * completion notification errors. + * __cleanup - reclaim used descriptors + * @ioat: channel (ring) to clean */ -static void ioat1_cleanup(struct ioat_dma_chan *ioat) +static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) { - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - - prefetch(chan->completion); - - if (!spin_trylock_bh(&chan->cleanup_lock)) - return; + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + bool seen_current = false; + int idx = ioat_chan->tail, i; + u16 active; - if (!ioat_cleanup_preamble(chan, &phys_complete)) { - spin_unlock_bh(&chan->cleanup_lock); - return; - } + dev_dbg(to_dev(ioat_chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued); - if (!spin_trylock_bh(&ioat->desc_lock)) { - spin_unlock_bh(&chan->cleanup_lock); + /* + * At restart of the channel, the completion address and the + * channel status will be 0 due to starting a new chain. Since + * it's new chain and the first descriptor "fails", there is + * nothing to clean up. We do not want to reap the entire submitted + * chain due to this 0 address value and then BUG. + */ + if (!phys_complete) return; - } - __cleanup(ioat, phys_complete); + active = ioat_ring_active(ioat_chan); + for (i = 0; i < active && !seen_current; i++) { + struct dma_async_tx_descriptor *tx; - spin_unlock_bh(&ioat->desc_lock); - spin_unlock_bh(&chan->cleanup_lock); -} - -static void ioat1_timer_event(unsigned long data) -{ - struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; + smp_read_barrier_depends(); + prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1)); + desc = ioat_get_ring_ent(ioat_chan, idx + i); + dump_desc_dbg(ioat_chan, desc); - dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state); + /* set err stat if we are using dwbes */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + desc_get_errstat(ioat_chan, desc); - spin_lock_bh(&chan->cleanup_lock); - if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) { - struct ioat_desc_sw *desc; - - spin_lock_bh(&ioat->desc_lock); + tx = &desc->txd; + if (tx->cookie) { + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } - /* restart active descriptors */ - desc = to_ioat_desc(ioat->used_desc.prev); - ioat_set_chainaddr(ioat, desc->txd.phys); - ioat_start(chan); + if (tx->phys == phys_complete) + seen_current = true; - ioat->pending = 0; - set_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - spin_unlock_bh(&ioat->desc_lock); - } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { - dma_addr_t phys_complete; + /* skip extended descriptors */ + if (desc_has_ext(desc)) { + BUG_ON(i + 1 >= active); + i++; + } - spin_lock_bh(&ioat->desc_lock); - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) - ioat1_reset_channel(ioat); - else { - u64 status = ioat_chansts(chan); - - /* manually update the last completion address */ - if (ioat_chansts_to_addr(status) != 0) - *chan->completion = status; - - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + /* cleanup super extended descriptors */ + if (desc->sed) { + ioat_free_sed(ioat_dma, desc->sed); + desc->sed = NULL; } - spin_unlock_bh(&ioat->desc_lock); } - spin_unlock_bh(&chan->cleanup_lock); -} - -enum dma_status -ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, - struct dma_tx_state *txstate) -{ - struct ioat_chan_common *chan = to_chan_common(c); - struct ioatdma_device *device = chan->device; - enum dma_status ret; - ret = dma_cookie_status(c, cookie, txstate); - if (ret == DMA_COMPLETE) - return ret; + /* finish all descriptor reads before incrementing tail */ + smp_mb(); + ioat_chan->tail = idx + i; + /* no active descs have written a completion? */ + BUG_ON(active && !seen_current); + ioat_chan->last_completion = phys_complete; - device->cleanup_fn((unsigned long) c); + if (active - i == 0) { + dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n", + __func__); + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + } - return dma_cookie_status(c, cookie, txstate); + /* 5 microsecond delay per pending descriptor */ + writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), + ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET); } -static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) +static void ioat_cleanup(struct ioatdma_chan *ioat_chan) { - struct ioat_chan_common *chan = &ioat->base; - struct ioat_desc_sw *desc; - struct ioat_dma_descriptor *hw; + u64 phys_complete; - spin_lock_bh(&ioat->desc_lock); + spin_lock_bh(&ioat_chan->cleanup_lock); - desc = ioat1_dma_get_next_descriptor(ioat); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); - if (!desc) { - dev_err(to_dev(chan), - "Unable to start null desc - get next desc failed\n"); - spin_unlock_bh(&ioat->desc_lock); - return; - } + if (is_ioat_halted(*ioat_chan->completion)) { + u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.compl_write = 1; - /* set size to non-zero value (channel returns error when size is 0) */ - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - async_tx_ack(&desc->txd); - hw->next = 0; - list_add_tail(&desc->node, &ioat->used_desc); - dump_desc_dbg(ioat, desc); + if (chanerr & IOAT_CHANERR_HANDLE_MASK) { + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + ioat_eh(ioat_chan); + } + } - ioat_set_chainaddr(ioat, desc->txd.phys); - ioat_start(chan); - spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); } -/* - * Perform a IOAT transaction to verify the HW works. - */ -#define IOAT_TEST_SIZE 2000 +void ioat_cleanup_event(unsigned long data) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data); + + ioat_cleanup(ioat_chan); + if (!test_bit(IOAT_RUN, &ioat_chan->state)) + return; + writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); +} -static void ioat_dma_test_callback(void *dma_async_param) +static void ioat_restart_channel(struct ioatdma_chan *ioat_chan) { - struct completion *cmp = dma_async_param; + u64 phys_complete; + + ioat_quiesce(ioat_chan, 0); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); - complete(cmp); + __ioat_restart_chan(ioat_chan); } -/** - * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. - * @device: device to be tested - */ -int ioat_dma_self_test(struct ioatdma_device *device) +static void ioat_eh(struct ioatdma_chan *ioat_chan) { - int i; - u8 *src; - u8 *dest; - struct dma_device *dma = &device->common; - struct device *dev = &device->pdev->dev; - struct dma_chan *dma_chan; + struct pci_dev *pdev = to_pdev(ioat_chan); + struct ioat_dma_descriptor *hw; struct dma_async_tx_descriptor *tx; - dma_addr_t dma_dest, dma_src; - dma_cookie_t cookie; - int err = 0; - struct completion cmp; - unsigned long tmo; - unsigned long flags; - - src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!src) - return -ENOMEM; - dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!dest) { - kfree(src); - return -ENOMEM; - } + u64 phys_complete; + struct ioat_ring_ent *desc; + u32 err_handled = 0; + u32 chanerr_int; + u32 chanerr; - /* Fill in src buffer */ - for (i = 0; i < IOAT_TEST_SIZE; i++) - src[i] = (u8)i; - - /* Start copy, using first DMA channel */ - dma_chan = container_of(dma->channels.next, struct dma_chan, - device_node); - if (dma->device_alloc_chan_resources(dma_chan) < 1) { - dev_err(dev, "selftest cannot allocate chan resource\n"); - err = -ENODEV; - goto out; - } + /* cleanup so tail points to descriptor that caused the error */ + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); - dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_src)) { - dev_err(dev, "mapping src buffer failed\n"); - goto free_resources; - } - dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, dma_dest)) { - dev_err(dev, "mapping dest buffer failed\n"); - goto unmap_src; - } - flags = DMA_PREP_INTERRUPT; - tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, - IOAT_TEST_SIZE, flags); - if (!tx) { - dev_err(dev, "Self-test prep failed, disabling\n"); - err = -ENODEV; - goto unmap_dma; - } + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test setup failed, disabling\n"); - err = -ENODEV; - goto unmap_dma; - } - dma->device_issue_pending(dma_chan); + dev_dbg(to_dev(ioat_chan), "%s: error = %x:%x\n", + __func__, chanerr, chanerr_int); - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail); + hw = desc->hw; + dump_desc_dbg(ioat_chan, desc); - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) - != DMA_COMPLETE) { - dev_err(dev, "Self-test copy timed out, disabling\n"); - err = -ENODEV; - goto unmap_dma; - } - if (memcmp(src, dest, IOAT_TEST_SIZE)) { - dev_err(dev, "Self-test copy failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; + switch (hw->ctl_f.op) { + case IOAT_OP_XOR_VAL: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + break; + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ_VAL_16S: + if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { + *desc->result |= SUM_CHECK_P_RESULT; + err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; + } + if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { + *desc->result |= SUM_CHECK_Q_RESULT; + err_handled |= IOAT_CHANERR_XOR_Q_ERR; + } + break; } -unmap_dma: - dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); -unmap_src: - dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); -free_resources: - dma->device_free_chan_resources(dma_chan); -out: - kfree(src); - kfree(dest); - return err; -} - -static char ioat_interrupt_style[32] = "msix"; -module_param_string(ioat_interrupt_style, ioat_interrupt_style, - sizeof(ioat_interrupt_style), 0644); -MODULE_PARM_DESC(ioat_interrupt_style, - "set ioat interrupt style: msix (default), msi, intx"); - -/** - * ioat_dma_setup_interrupts - setup interrupt handler - * @device: ioat device - */ -int ioat_dma_setup_interrupts(struct ioatdma_device *device) -{ - struct ioat_chan_common *chan; - struct pci_dev *pdev = device->pdev; - struct device *dev = &pdev->dev; - struct msix_entry *msix; - int i, j, msixcnt; - int err = -EINVAL; - u8 intrctrl = 0; - - if (!strcmp(ioat_interrupt_style, "msix")) - goto msix; - if (!strcmp(ioat_interrupt_style, "msi")) - goto msi; - if (!strcmp(ioat_interrupt_style, "intx")) - goto intx; - dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); - goto err_no_irq; - -msix: - /* The number of MSI-X vectors should equal the number of channels */ - msixcnt = device->common.chancnt; - for (i = 0; i < msixcnt; i++) - device->msix_entries[i].entry = i; - - err = pci_enable_msix_exact(pdev, device->msix_entries, msixcnt); - if (err) - goto msi; - - for (i = 0; i < msixcnt; i++) { - msix = &device->msix_entries[i]; - chan = ioat_chan_by_index(device, i); - err = devm_request_irq(dev, msix->vector, - ioat_dma_do_interrupt_msix, 0, - "ioat-msix", chan); - if (err) { - for (j = 0; j < i; j++) { - msix = &device->msix_entries[j]; - chan = ioat_chan_by_index(device, j); - devm_free_irq(dev, msix->vector, chan); + /* fault on unhandled error or spurious halt */ + if (chanerr ^ err_handled || chanerr == 0) { + dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n", + __func__, chanerr, err_handled); + BUG(); + } else { /* cleanup the faulty descriptor */ + tx = &desc->txd; + if (tx->cookie) { + dma_cookie_complete(tx); + dma_descriptor_unmap(tx); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; } - goto msi; } } - intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; - device->irq_mode = IOAT_MSIX; - goto done; -msi: - err = pci_enable_msi(pdev); - if (err) - goto intx; + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); - err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, - "ioat-msi", device); - if (err) { - pci_disable_msi(pdev); - goto intx; - } - device->irq_mode = IOAT_MSI; - goto done; - -intx: - err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, - IRQF_SHARED, "ioat-intx", device); - if (err) - goto err_no_irq; - - device->irq_mode = IOAT_INTX; -done: - if (device->intr_quirk) - device->intr_quirk(device); - intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; - writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); - return 0; - -err_no_irq: - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - device->irq_mode = IOAT_NOIRQ; - dev_err(dev, "no usable interrupts\n"); - return err; -} -EXPORT_SYMBOL(ioat_dma_setup_interrupts); + /* mark faulting descriptor as complete */ + *ioat_chan->completion = desc->txd.phys; -static void ioat_disable_interrupts(struct ioatdma_device *device) -{ - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + spin_lock_bh(&ioat_chan->prep_lock); + ioat_restart_channel(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); } -int ioat_probe(struct ioatdma_device *device) +static void check_active(struct ioatdma_chan *ioat_chan) { - int err = -ENODEV; - struct dma_device *dma = &device->common; - struct pci_dev *pdev = device->pdev; - struct device *dev = &pdev->dev; - - /* DMA coherent memory pool for DMA descriptor allocations */ - device->dma_pool = pci_pool_create("dma_desc_pool", pdev, - sizeof(struct ioat_dma_descriptor), - 64, 0); - if (!device->dma_pool) { - err = -ENOMEM; - goto err_dma_pool; - } - - device->completion_pool = pci_pool_create("completion_pool", pdev, - sizeof(u64), SMP_CACHE_BYTES, - SMP_CACHE_BYTES); - - if (!device->completion_pool) { - err = -ENOMEM; - goto err_completion_pool; + if (ioat_ring_active(ioat_chan)) { + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + return; } - device->enumerate_channels(device); - - dma_cap_set(DMA_MEMCPY, dma->cap_mask); - dma->dev = &pdev->dev; + if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state)) + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); + else if (ioat_chan->alloc_order > ioat_get_alloc_order()) { + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + reshape_ring(ioat_chan, ioat_chan->alloc_order - 1); - if (!dma->chancnt) { - dev_err(dev, "channel enumeration error\n"); - goto err_setup_interrupts; + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat_chan->alloc_order > ioat_get_alloc_order()) + mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } - err = ioat_dma_setup_interrupts(device); - if (err) - goto err_setup_interrupts; +} - err = device->self_test(device); - if (err) - goto err_self_test; +void ioat_timer_event(unsigned long data) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data); + dma_addr_t phys_complete; + u64 status; - return 0; + status = ioat_chansts(ioat_chan); -err_self_test: - ioat_disable_interrupts(device); -err_setup_interrupts: - pci_pool_destroy(device->completion_pool); -err_completion_pool: - pci_pool_destroy(device->dma_pool); -err_dma_pool: - return err; -} + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; -int ioat_register(struct ioatdma_device *device) -{ - int err = dma_async_device_register(&device->common); + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n", + __func__, chanerr); + if (test_bit(IOAT_RUN, &ioat_chan->state)) + BUG_ON(is_ioat_bug(chanerr)); + else /* we never got off the ground */ + return; + } - if (err) { - ioat_disable_interrupts(device); - pci_pool_destroy(device->completion_pool); - pci_pool_destroy(device->dma_pool); + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + spin_lock_bh(&ioat_chan->cleanup_lock); + if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) + __cleanup(ioat_chan, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) { + spin_lock_bh(&ioat_chan->prep_lock); + ioat_restart_channel(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + return; + } else { + set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); } - return err; -} -/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */ -static void ioat1_intr_quirk(struct ioatdma_device *device) -{ - struct pci_dev *pdev = device->pdev; - u32 dmactrl; - - pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); - if (pdev->msi_enabled) - dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - else - dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); + if (ioat_ring_active(ioat_chan)) + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); + else { + spin_lock_bh(&ioat_chan->prep_lock); + check_active(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + } + spin_unlock_bh(&ioat_chan->cleanup_lock); } -static ssize_t ring_size_show(struct dma_chan *c, char *page) +enum dma_status +ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate) { - struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + enum dma_status ret; - return sprintf(page, "%d\n", ioat->desccount); -} -static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + ret = dma_cookie_status(c, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; -static ssize_t ring_active_show(struct dma_chan *c, char *page) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(c); + ioat_cleanup(ioat_chan); - return sprintf(page, "%d\n", ioat->active); + return dma_cookie_status(c, cookie, txstate); } -static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); -static ssize_t cap_show(struct dma_chan *c, char *page) +static int ioat_irq_reinit(struct ioatdma_device *ioat_dma) { - struct dma_device *dma = c->device; + struct pci_dev *pdev = ioat_dma->pdev; + int irq = pdev->irq, i; - return sprintf(page, "copy%s%s%s%s%s\n", - dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", - dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", - dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", - dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", - dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + if (!is_bwd_ioat(pdev)) + return 0; -} -struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); - -static ssize_t version_show(struct dma_chan *c, char *page) -{ - struct dma_device *dma = c->device; - struct ioatdma_device *device = to_ioatdma_device(dma); + switch (ioat_dma->irq_mode) { + case IOAT_MSIX: + for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) { + struct msix_entry *msix = &ioat_dma->msix_entries[i]; + struct ioatdma_chan *ioat_chan; - return sprintf(page, "%d.%d\n", - device->version >> 4, device->version & 0xf); -} -struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); - -static struct attribute *ioat1_attrs[] = { - &ring_size_attr.attr, - &ring_active_attr.attr, - &ioat_cap_attr.attr, - &ioat_version_attr.attr, - NULL, -}; - -static ssize_t -ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) -{ - struct ioat_sysfs_entry *entry; - struct ioat_chan_common *chan; + ioat_chan = ioat_chan_by_index(ioat_dma, i); + devm_free_irq(&pdev->dev, msix->vector, ioat_chan); + } - entry = container_of(attr, struct ioat_sysfs_entry, attr); - chan = container_of(kobj, struct ioat_chan_common, kobj); + pci_disable_msix(pdev); + break; + case IOAT_MSI: + pci_disable_msi(pdev); + /* fall through */ + case IOAT_INTX: + devm_free_irq(&pdev->dev, irq, ioat_dma); + break; + default: + return 0; + } + ioat_dma->irq_mode = IOAT_NOIRQ; - if (!entry->show) - return -EIO; - return entry->show(&chan->common, page); + return ioat_dma_setup_interrupts(ioat_dma); } -const struct sysfs_ops ioat_sysfs_ops = { - .show = ioat_attr_show, -}; - -static struct kobj_type ioat1_ktype = { - .sysfs_ops = &ioat_sysfs_ops, - .default_attrs = ioat1_attrs, -}; - -void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type) +int ioat_reset_hw(struct ioatdma_chan *ioat_chan) { - struct dma_device *dma = &device->common; - struct dma_chan *c; + /* throw away whatever the channel was doing and get it + * initialized, with ioat3 specific workarounds + */ + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct pci_dev *pdev = ioat_dma->pdev; + u32 chanerr; + u16 dev_id; + int err; + + ioat_quiesce(ioat_chan, msecs_to_jiffies(100)); - list_for_each_entry(c, &dma->channels, device_node) { - struct ioat_chan_common *chan = to_chan_common(c); - struct kobject *parent = &c->dev->device.kobj; - int err; + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata"); + if (ioat_dma->version < IOAT_VER_3_3) { + /* clear any pending errors */ + err = pci_read_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); if (err) { - dev_warn(to_dev(chan), - "sysfs init error (%d), continuing...\n", err); - kobject_put(&chan->kobj); - set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state); + dev_err(&pdev->dev, + "channel error register unreachable\n"); + return err; } - } -} + pci_write_config_dword(pdev, + IOAT_PCI_CHANERR_INT_OFFSET, chanerr); -void ioat_kobject_del(struct ioatdma_device *device) -{ - struct dma_device *dma = &device->common; - struct dma_chan *c; - - list_for_each_entry(c, &dma->channels, device_node) { - struct ioat_chan_common *chan = to_chan_common(c); - - if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) { - kobject_del(&chan->kobj); - kobject_put(&chan->kobj); + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + pci_write_config_dword(pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + 0x10); } } -} -int ioat1_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - int err; + err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200)); + if (!err) + err = ioat_irq_reinit(ioat_dma); - device->intr_quirk = ioat1_intr_quirk; - device->enumerate_channels = ioat1_enumerate_channels; - device->self_test = ioat_dma_self_test; - device->timer_fn = ioat1_timer_event; - device->cleanup_fn = ioat1_cleanup_event; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; - dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; - dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; - dma->device_free_chan_resources = ioat1_dma_free_chan_resources; - dma->device_tx_status = ioat_dma_tx_status; - - err = ioat_probe(device); - if (err) - return err; - err = ioat_register(device); if (err) - return err; - ioat_kobject_add(device, &ioat1_ktype); - - if (dca) - device->dca = ioat_dca_init(pdev, device->reg_base); + dev_err(&pdev->dev, "Failed to reset: %d\n", err); return err; } - -void ioat_dma_remove(struct ioatdma_device *device) -{ - struct dma_device *dma = &device->common; - - ioat_disable_interrupts(device); - - ioat_kobject_del(device); - - dma_async_device_unregister(dma); - - pci_pool_destroy(device->dma_pool); - pci_pool_destroy(device->completion_pool); - - INIT_LIST_HEAD(&dma->channels); -} diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 30f5c7eede16..1bc084986646 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -18,26 +18,32 @@ #define IOATDMA_H #include <linux/dmaengine.h> -#include "hw.h" -#include "registers.h" #include <linux/init.h> #include <linux/dmapool.h> #include <linux/cache.h> #include <linux/pci_ids.h> -#include <net/tcp.h> +#include <linux/circ_buf.h> +#include <linux/interrupt.h> +#include "registers.h" +#include "hw.h" #define IOAT_DMA_VERSION "4.00" -#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_DMA_DCA_ANY_CPU ~0 -#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) -#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) -#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) -#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) -#define to_pdev(ioat_chan) ((ioat_chan)->device->pdev) +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev) +#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev) +#define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80) -#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) +/* ioat hardware assumes at least two sources for raid operations */ +#define src_cnt_to_sw(x) ((x) + 2) +#define src_cnt_to_hw(x) ((x) - 2) +#define ndest_to_sw(x) ((x) + 1) +#define ndest_to_hw(x) ((x) - 1) +#define src16_cnt_to_sw(x) ((x) + 9) +#define src16_cnt_to_hw(x) ((x) - 9) /* * workaround for IOAT ver.3.0 null descriptor issue @@ -57,19 +63,15 @@ enum ioat_irq_mode { * @pdev: PCI-Express device * @reg_base: MMIO register space base address * @dma_pool: for allocating DMA descriptors - * @common: embedded struct dma_device + * @completion_pool: DMA buffers for completion ops + * @sed_hw_pool: DMA super descriptor pools + * @dma_dev: embedded struct dma_device * @version: version of ioatdma device * @msix_entries: irq handlers * @idx: per channel data * @dca: direct cache access context - * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) - * @enumerate_channels: hw version specific channel enumeration - * @reset_hw: hw version specific channel (re)initialization - * @cleanup_fn: select between the v2 and v3 cleanup routines - * @timer_fn: select between the v2 and v3 timer watchdog routines - * @self_test: hardware version specific self test for each supported op type - * - * Note: the v3 cleanup routine supports raid operations + * @irq_mode: interrupt mode (INTX, MSI, MSIX) + * @cap: read DMA capabilities register */ struct ioatdma_device { struct pci_dev *pdev; @@ -78,28 +80,21 @@ struct ioatdma_device { struct pci_pool *completion_pool; #define MAX_SED_POOLS 5 struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; - struct dma_device common; + struct dma_device dma_dev; u8 version; struct msix_entry msix_entries[4]; - struct ioat_chan_common *idx[4]; + struct ioatdma_chan *idx[4]; struct dca_provider *dca; enum ioat_irq_mode irq_mode; u32 cap; - void (*intr_quirk)(struct ioatdma_device *device); - int (*enumerate_channels)(struct ioatdma_device *device); - int (*reset_hw)(struct ioat_chan_common *chan); - void (*cleanup_fn)(unsigned long data); - void (*timer_fn)(unsigned long data); - int (*self_test)(struct ioatdma_device *device); }; -struct ioat_chan_common { - struct dma_chan common; +struct ioatdma_chan { + struct dma_chan dma_chan; void __iomem *reg_base; dma_addr_t last_completion; spinlock_t cleanup_lock; unsigned long state; - #define IOAT_COMPLETION_PENDING 0 #define IOAT_COMPLETION_ACK 1 #define IOAT_RESET_PENDING 2 #define IOAT_KOBJ_INIT_FAIL 3 @@ -110,11 +105,32 @@ struct ioat_chan_common { #define COMPLETION_TIMEOUT msecs_to_jiffies(100) #define IDLE_TIMEOUT msecs_to_jiffies(2000) #define RESET_DELAY msecs_to_jiffies(100) - struct ioatdma_device *device; + struct ioatdma_device *ioat_dma; dma_addr_t completion_dma; u64 *completion; struct tasklet_struct cleanup_task; struct kobject kobj; + +/* ioat v2 / v3 channel attributes + * @xfercap_log; log2 of channel max transfer length (for fast division) + * @head: allocated index + * @issued: hardware notification point + * @tail: cleanup index + * @dmacount: identical to 'head' except for occasionally resetting to zero + * @alloc_order: log2 of the number of allocated descriptors + * @produce: number of descriptors to produce at submit time + * @ring: software ring buffer implementation of hardware ring + * @prep_lock: serializes descriptor preparation (producers) + */ + size_t xfercap_log; + u16 head; + u16 issued; + u16 tail; + u16 dmacount; + u16 alloc_order; + u16 produce; + struct ioat_ring_ent **ring; + spinlock_t prep_lock; }; struct ioat_sysfs_entry { @@ -123,28 +139,11 @@ struct ioat_sysfs_entry { }; /** - * struct ioat_dma_chan - internal representation of a DMA channel - */ -struct ioat_dma_chan { - struct ioat_chan_common base; - - size_t xfercap; /* XFERCAP register value expanded out */ - - spinlock_t desc_lock; - struct list_head free_desc; - struct list_head used_desc; - - int pending; - u16 desccount; - u16 active; -}; - -/** * struct ioat_sed_ent - wrapper around super extended hardware descriptor * @hw: hardware SED - * @sed_dma: dma address for the SED - * @list: list member + * @dma: dma address for the SED * @parent: point to the dma descriptor that's the parent + * @hw_pool: descriptor pool index */ struct ioat_sed_ent { struct ioat_sed_raw_descriptor *hw; @@ -153,39 +152,57 @@ struct ioat_sed_ent { unsigned int hw_pool; }; -static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) -{ - return container_of(c, struct ioat_chan_common, common); -} - -static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) -{ - struct ioat_chan_common *chan = to_chan_common(c); - - return container_of(chan, struct ioat_dma_chan, base); -} - -/* wrapper around hardware descriptor format + additional software fields */ - /** - * struct ioat_desc_sw - wrapper around hardware descriptor + * struct ioat_ring_ent - wrapper around hardware descriptor * @hw: hardware DMA descriptor (for memcpy) - * @node: this descriptor will either be on the free list, - * or attached to a transaction list (tx_list) + * @xor: hardware xor descriptor + * @xor_ex: hardware xor extension descriptor + * @pq: hardware pq descriptor + * @pq_ex: hardware pq extension descriptor + * @pqu: hardware pq update descriptor + * @raw: hardware raw (un-typed) descriptor * @txd: the generic software descriptor for all engines + * @len: total transaction length for unmap + * @result: asynchronous result of validate operations * @id: identifier for debug + * @sed: pointer to super extended descriptor sw desc */ -struct ioat_desc_sw { - struct ioat_dma_descriptor *hw; - struct list_head node; + +struct ioat_ring_ent { + union { + struct ioat_dma_descriptor *hw; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex; + struct ioat_pq_update_descriptor *pqu; + struct ioat_raw_descriptor *raw; + }; size_t len; - struct list_head tx_list; struct dma_async_tx_descriptor txd; + enum sum_check_flags *result; #ifdef DEBUG int id; #endif + struct ioat_sed_ent *sed; }; +extern const struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; +extern int ioat_pending_level; +extern int ioat_ring_alloc_order; +extern struct kobj_type ioat_ktype; +extern struct kmem_cache *ioat_cache; +extern int ioat_ring_max_alloc_order; +extern struct kmem_cache *ioat_sed_cache; + +static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c) +{ + return container_of(c, struct ioatdma_chan, dma_chan); +} + +/* wrapper around hardware descriptor format + additional software fields */ #ifdef DEBUG #define set_desc_id(desc, i) ((desc)->id = (i)) #define desc_id(desc) ((desc)->id) @@ -195,10 +212,10 @@ struct ioat_desc_sw { #endif static inline void -__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, +__dump_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_dma_descriptor *hw, struct dma_async_tx_descriptor *tx, int id) { - struct device *dev = to_dev(chan); + struct device *dev = to_dev(ioat_chan); dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" " ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id, @@ -208,25 +225,25 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, } #define dump_desc_dbg(c, d) \ - ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; }) + ({ if (d) __dump_desc_dbg(c, d->hw, &d->txd, desc_id(d)); 0; }) -static inline struct ioat_chan_common * -ioat_chan_by_index(struct ioatdma_device *device, int index) +static inline struct ioatdma_chan * +ioat_chan_by_index(struct ioatdma_device *ioat_dma, int index) { - return device->idx[index]; + return ioat_dma->idx[index]; } -static inline u64 ioat_chansts_32(struct ioat_chan_common *chan) +static inline u64 ioat_chansts_32(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; u64 status; u32 status_lo; /* We need to read the low address first as this causes the * chipset to latch the upper bits for the subsequent read */ - status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); - status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); + status_lo = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); + status = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); status <<= 32; status |= status_lo; @@ -235,16 +252,16 @@ static inline u64 ioat_chansts_32(struct ioat_chan_common *chan) #if BITS_PER_LONG == 64 -static inline u64 ioat_chansts(struct ioat_chan_common *chan) +static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; u64 status; /* With IOAT v3.3 the status register is 64bit. */ if (ver >= IOAT_VER_3_3) - status = readq(chan->reg_base + IOAT_CHANSTS_OFFSET(ver)); + status = readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET(ver)); else - status = ioat_chansts_32(chan); + status = ioat_chansts_32(ioat_chan); return status; } @@ -253,56 +270,41 @@ static inline u64 ioat_chansts(struct ioat_chan_common *chan) #define ioat_chansts ioat_chansts_32 #endif -static inline void ioat_start(struct ioat_chan_common *chan) -{ - u8 ver = chan->device->version; - - writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); -} - static inline u64 ioat_chansts_to_addr(u64 status) { return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; } -static inline u32 ioat_chanerr(struct ioat_chan_common *chan) +static inline u32 ioat_chanerr(struct ioatdma_chan *ioat_chan) { - return readl(chan->reg_base + IOAT_CHANERR_OFFSET); + return readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); } -static inline void ioat_suspend(struct ioat_chan_common *chan) +static inline void ioat_suspend(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; - writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + writeb(IOAT_CHANCMD_SUSPEND, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } -static inline void ioat_reset(struct ioat_chan_common *chan) +static inline void ioat_reset(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; - writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } -static inline bool ioat_reset_pending(struct ioat_chan_common *chan) +static inline bool ioat_reset_pending(struct ioatdma_chan *ioat_chan) { - u8 ver = chan->device->version; + u8 ver = ioat_chan->ioat_dma->version; u8 cmd; - cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + cmd = readb(ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET; } -static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) -{ - struct ioat_chan_common *chan = &ioat->base; - - writel(addr & 0x00000000FFFFFFFF, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(addr >> 32, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); -} - static inline bool is_ioat_active(unsigned long status) { return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); @@ -329,24 +331,111 @@ static inline bool is_ioat_bug(unsigned long err) return !!err; } -int ioat_probe(struct ioatdma_device *device); -int ioat_register(struct ioatdma_device *device); -int ioat1_dma_probe(struct ioatdma_device *dev, int dca); -int ioat_dma_self_test(struct ioatdma_device *device); -void ioat_dma_remove(struct ioatdma_device *device); +#define IOAT_MAX_ORDER 16 +#define ioat_get_alloc_order() \ + (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) +#define ioat_get_max_alloc_order() \ + (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) + +static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan) +{ + return 1 << ioat_chan->alloc_order; +} + +/* count of descriptors in flight with the engine */ +static inline u16 ioat_ring_active(struct ioatdma_chan *ioat_chan) +{ + return CIRC_CNT(ioat_chan->head, ioat_chan->tail, + ioat_ring_size(ioat_chan)); +} + +/* count of descriptors pending submission to hardware */ +static inline u16 ioat_ring_pending(struct ioatdma_chan *ioat_chan) +{ + return CIRC_CNT(ioat_chan->head, ioat_chan->issued, + ioat_ring_size(ioat_chan)); +} + +static inline u32 ioat_ring_space(struct ioatdma_chan *ioat_chan) +{ + return ioat_ring_size(ioat_chan) - ioat_ring_active(ioat_chan); +} + +static inline u16 +ioat_xferlen_to_descs(struct ioatdma_chan *ioat_chan, size_t len) +{ + u16 num_descs = len >> ioat_chan->xfercap_log; + + num_descs += !!(len & ((1 << ioat_chan->xfercap_log) - 1)); + return num_descs; +} + +static inline struct ioat_ring_ent * +ioat_get_ring_ent(struct ioatdma_chan *ioat_chan, u16 idx) +{ + return ioat_chan->ring[idx & (ioat_ring_size(ioat_chan) - 1)]; +} + +static inline void +ioat_set_chainaddr(struct ioatdma_chan *ioat_chan, u64 addr) +{ + writel(addr & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); +} + +/* IOAT Prep functions */ +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags); +struct dma_async_tx_descriptor * +ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags); + +/* IOAT Operation functions */ +irqreturn_t ioat_dma_do_interrupt(int irq, void *data); +irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data); +struct ioat_ring_ent ** +ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags); +void ioat_start_null_desc(struct ioatdma_chan *ioat_chan); +void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan); +int ioat_reset_hw(struct ioatdma_chan *ioat_chan); +enum dma_status +ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie, + struct dma_tx_state *txstate); +void ioat_cleanup_event(unsigned long data); +void ioat_timer_event(unsigned long data); +int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs); +void ioat_issue_pending(struct dma_chan *chan); +void ioat_timer_event(unsigned long data); + +/* IOAT Init functions */ +bool is_bwd_ioat(struct pci_dev *pdev); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); -dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan); -void ioat_init_channel(struct ioatdma_device *device, - struct ioat_chan_common *chan, int idx); -enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, - struct dma_tx_state *txstate); -bool ioat_cleanup_preamble(struct ioat_chan_common *chan, - dma_addr_t *phys_complete); -void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); -void ioat_kobject_del(struct ioatdma_device *device); -int ioat_dma_setup_interrupts(struct ioatdma_device *device); -void ioat_stop(struct ioat_chan_common *chan); -extern const struct sysfs_ops ioat_sysfs_ops; -extern struct ioat_sysfs_entry ioat_version_attr; -extern struct ioat_sysfs_entry ioat_cap_attr; +void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *ioat_dma); +int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma); +void ioat_stop(struct ioatdma_chan *ioat_chan); #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c deleted file mode 100644 index 69c7dfcad023..000000000000 --- a/drivers/dma/ioat/dma_v2.c +++ /dev/null @@ -1,916 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2004 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine (versions >= 2), which - * does asynchronous data movement and checksumming operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dmaengine.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> -#include <linux/workqueue.h> -#include <linux/prefetch.h> -#include <linux/i7300_idle.h> -#include "dma.h" -#include "dma_v2.h" -#include "registers.h" -#include "hw.h" - -#include "../dmaengine.h" - -int ioat_ring_alloc_order = 8; -module_param(ioat_ring_alloc_order, int, 0644); -MODULE_PARM_DESC(ioat_ring_alloc_order, - "ioat2+: allocate 2^n descriptors per channel" - " (default: 8 max: 16)"); -static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; -module_param(ioat_ring_max_alloc_order, int, 0644); -MODULE_PARM_DESC(ioat_ring_max_alloc_order, - "ioat2+: upper limit for ring size (default: 16)"); - -void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - ioat->dmacount += ioat2_ring_pending(ioat); - ioat->issued = ioat->head; - writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); - dev_dbg(to_dev(chan), - "%s: head: %#x tail: %#x issued: %#x count: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); -} - -void ioat2_issue_pending(struct dma_chan *c) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - - if (ioat2_ring_pending(ioat)) { - spin_lock_bh(&ioat->prep_lock); - __ioat2_issue_pending(ioat); - spin_unlock_bh(&ioat->prep_lock); - } -} - -/** - * ioat2_update_pending - log pending descriptors - * @ioat: ioat2+ channel - * - * Check if the number of unsubmitted descriptors has exceeded the - * watermark. Called with prep_lock held - */ -static void ioat2_update_pending(struct ioat2_dma_chan *ioat) -{ - if (ioat2_ring_pending(ioat) > ioat_pending_level) - __ioat2_issue_pending(ioat); -} - -static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) -{ - struct ioat_ring_ent *desc; - struct ioat_dma_descriptor *hw; - - if (ioat2_ring_space(ioat) < 1) { - dev_err(to_dev(&ioat->base), - "Unable to start null desc - ring full\n"); - return; - } - - dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); - desc = ioat2_get_ring_ent(ioat, ioat->head); - - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.compl_write = 1; - /* set size to non-zero value (channel returns error when size is 0) */ - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - async_tx_ack(&desc->txd); - ioat2_set_chainaddr(ioat, desc->txd.phys); - dump_desc_dbg(ioat, desc); - wmb(); - ioat->head += 1; - __ioat2_issue_pending(ioat); -} - -static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) -{ - spin_lock_bh(&ioat->prep_lock); - __ioat2_start_null_desc(ioat); - spin_unlock_bh(&ioat->prep_lock); -} - -static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) -{ - struct ioat_chan_common *chan = &ioat->base; - struct dma_async_tx_descriptor *tx; - struct ioat_ring_ent *desc; - bool seen_current = false; - u16 active; - int idx = ioat->tail, i; - - dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); - - active = ioat2_ring_active(ioat); - for (i = 0; i < active && !seen_current; i++) { - smp_read_barrier_depends(); - prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); - desc = ioat2_get_ring_ent(ioat, idx + i); - tx = &desc->txd; - dump_desc_dbg(ioat, desc); - if (tx->cookie) { - dma_descriptor_unmap(tx); - dma_cookie_complete(tx); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - - if (tx->phys == phys_complete) - seen_current = true; - } - smp_mb(); /* finish all descriptor reads before incrementing tail */ - ioat->tail = idx + i; - BUG_ON(active && !seen_current); /* no active descs have written a completion? */ - - chan->last_completion = phys_complete; - if (active - i == 0) { - dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", - __func__); - clear_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } -} - -/** - * ioat2_cleanup - clean finished descriptors (advance tail pointer) - * @chan: ioat channel to be cleaned up - */ -static void ioat2_cleanup(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - spin_unlock_bh(&chan->cleanup_lock); -} - -void ioat2_cleanup_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - - ioat2_cleanup(ioat); - if (!test_bit(IOAT_RUN, &chan->state)) - return; - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); -} - -void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - /* set the tail to be re-issued */ - ioat->issued = ioat->tail; - ioat->dmacount = 0; - set_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - - dev_dbg(to_dev(chan), - "%s: head: %#x tail: %#x issued: %#x count: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); - - if (ioat2_ring_pending(ioat)) { - struct ioat_ring_ent *desc; - - desc = ioat2_get_ring_ent(ioat, ioat->tail); - ioat2_set_chainaddr(ioat, desc->txd.phys); - __ioat2_issue_pending(ioat); - } else - __ioat2_start_null_desc(ioat); -} - -int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) -{ - unsigned long end = jiffies + tmo; - int err = 0; - u32 status; - - status = ioat_chansts(chan); - if (is_ioat_active(status) || is_ioat_idle(status)) - ioat_suspend(chan); - while (is_ioat_active(status) || is_ioat_idle(status)) { - if (tmo && time_after(jiffies, end)) { - err = -ETIMEDOUT; - break; - } - status = ioat_chansts(chan); - cpu_relax(); - } - - return err; -} - -int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo) -{ - unsigned long end = jiffies + tmo; - int err = 0; - - ioat_reset(chan); - while (ioat_reset_pending(chan)) { - if (end && time_after(jiffies, end)) { - err = -ETIMEDOUT; - break; - } - cpu_relax(); - } - - return err; -} - -static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - - ioat2_quiesce(chan, 0); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - __ioat2_restart_chan(ioat); -} - -static void check_active(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - if (ioat2_ring_active(ioat)) { - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - return; - } - - if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state)) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - else if (ioat->alloc_order > ioat_get_alloc_order()) { - /* if the ring is idle, empty, and oversized try to step - * down the size - */ - reshape_ring(ioat, ioat->alloc_order - 1); - - /* keep shrinking until we get back to our minimum - * default size - */ - if (ioat->alloc_order > ioat_get_alloc_order()) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } - -} - -void ioat2_timer_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - u64 status; - - status = ioat_chansts(chan); - - /* when halted due to errors check for channel - * programming errors before advancing the completion state - */ - if (is_ioat_halted(status)) { - u32 chanerr; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "%s: Channel halted (%x)\n", - __func__, chanerr); - if (test_bit(IOAT_RUN, &chan->state)) - BUG_ON(is_ioat_bug(chanerr)); - else /* we never got off the ground */ - return; - } - - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { - spin_lock_bh(&ioat->prep_lock); - ioat2_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - return; - } else { - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - } - - - if (ioat2_ring_active(ioat)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - else { - spin_lock_bh(&ioat->prep_lock); - check_active(ioat); - spin_unlock_bh(&ioat->prep_lock); - } - spin_unlock_bh(&chan->cleanup_lock); -} - -static int ioat2_reset_hw(struct ioat_chan_common *chan) -{ - /* throw away whatever the channel was doing and get it initialized */ - u32 chanerr; - - ioat2_quiesce(chan, msecs_to_jiffies(100)); - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - - return ioat2_reset_sync(chan, msecs_to_jiffies(200)); -} - -/** - * ioat2_enumerate_channels - find and initialize the device's channels - * @device: the device to be enumerated - */ -int ioat2_enumerate_channels(struct ioatdma_device *device) -{ - struct ioat2_dma_chan *ioat; - struct device *dev = &device->pdev->dev; - struct dma_device *dma = &device->common; - u8 xfercap_log; - int i; - - INIT_LIST_HEAD(&dma->channels); - dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); - dma->chancnt &= 0x1f; /* bits [4:0] valid */ - if (dma->chancnt > ARRAY_SIZE(device->idx)) { - dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", - dma->chancnt, ARRAY_SIZE(device->idx)); - dma->chancnt = ARRAY_SIZE(device->idx); - } - xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); - xfercap_log &= 0x1f; /* bits [4:0] valid */ - if (xfercap_log == 0) - return 0; - dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); - - /* FIXME which i/oat version is i7300? */ -#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) - dma->chancnt--; -#endif - for (i = 0; i < dma->chancnt; i++) { - ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); - if (!ioat) - break; - - ioat_init_channel(device, &ioat->base, i); - ioat->xfercap_log = xfercap_log; - spin_lock_init(&ioat->prep_lock); - if (device->reset_hw(&ioat->base)) { - i = 0; - break; - } - } - dma->chancnt = i; - return i; -} - -static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) -{ - struct dma_chan *c = tx->chan; - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - dma_cookie_t cookie; - - cookie = dma_cookie_assign(tx); - dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); - - if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &chan->state)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - - /* make descriptor updates visible before advancing ioat->head, - * this is purposefully not smp_wmb() since we are also - * publishing the descriptor updates to a dma device - */ - wmb(); - - ioat->head += ioat->produce; - - ioat2_update_pending(ioat); - spin_unlock_bh(&ioat->prep_lock); - - return cookie; -} - -static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) -{ - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *desc; - struct ioatdma_device *dma; - dma_addr_t phys; - - dma = to_ioatdma_device(chan->device); - hw = pci_pool_alloc(dma->dma_pool, flags, &phys); - if (!hw) - return NULL; - memset(hw, 0, sizeof(*hw)); - - desc = kmem_cache_zalloc(ioat2_cache, flags); - if (!desc) { - pci_pool_free(dma->dma_pool, hw, phys); - return NULL; - } - - dma_async_tx_descriptor_init(&desc->txd, chan); - desc->txd.tx_submit = ioat2_tx_submit_unlock; - desc->hw = hw; - desc->txd.phys = phys; - return desc; -} - -static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) -{ - struct ioatdma_device *dma; - - dma = to_ioatdma_device(chan->device); - pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); - kmem_cache_free(ioat2_cache, desc); -} - -static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) -{ - struct ioat_ring_ent **ring; - int descs = 1 << order; - int i; - - if (order > ioat_get_max_alloc_order()) - return NULL; - - /* allocate the array to hold the software ring */ - ring = kcalloc(descs, sizeof(*ring), flags); - if (!ring) - return NULL; - for (i = 0; i < descs; i++) { - ring[i] = ioat2_alloc_ring_ent(c, flags); - if (!ring[i]) { - while (i--) - ioat2_free_ring_ent(ring[i], c); - kfree(ring); - return NULL; - } - set_desc_id(ring[i], i); - } - - /* link descs */ - for (i = 0; i < descs-1; i++) { - struct ioat_ring_ent *next = ring[i+1]; - struct ioat_dma_descriptor *hw = ring[i]->hw; - - hw->next = next->txd.phys; - } - ring[i]->hw->next = ring[0]->txd.phys; - - return ring; -} - -void ioat2_free_chan_resources(struct dma_chan *c); - -/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring - * @chan: channel to be initialized - */ -int ioat2_alloc_chan_resources(struct dma_chan *c) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioat_ring_ent **ring; - u64 status; - int order; - int i = 0; - - /* have we already been set up? */ - if (ioat->ring) - return 1 << ioat->alloc_order; - - /* Setup register to interrupt and write completion status on error */ - writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); - - /* allocate a completion writeback area */ - /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - chan->completion = pci_pool_alloc(chan->device->completion_pool, - GFP_KERNEL, &chan->completion_dma); - if (!chan->completion) - return -ENOMEM; - - memset(chan->completion, 0, sizeof(*chan->completion)); - writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) chan->completion_dma) >> 32, - chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - order = ioat_get_alloc_order(); - ring = ioat2_alloc_ring(c, order, GFP_KERNEL); - if (!ring) - return -ENOMEM; - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - ioat->ring = ring; - ioat->head = 0; - ioat->issued = 0; - ioat->tail = 0; - ioat->alloc_order = order; - set_bit(IOAT_RUN, &chan->state); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - - ioat2_start_null_desc(ioat); - - /* check that we got off the ground */ - do { - udelay(1); - status = ioat_chansts(chan); - } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); - - if (is_ioat_active(status) || is_ioat_idle(status)) { - return 1 << ioat->alloc_order; - } else { - u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - - dev_WARN(to_dev(chan), - "failed to start channel chanerr: %#x\n", chanerr); - ioat2_free_chan_resources(c); - return -EFAULT; - } -} - -bool reshape_ring(struct ioat2_dma_chan *ioat, int order) -{ - /* reshape differs from normal ring allocation in that we want - * to allocate a new software ring while only - * extending/truncating the hardware ring - */ - struct ioat_chan_common *chan = &ioat->base; - struct dma_chan *c = &chan->common; - const u32 curr_size = ioat2_ring_size(ioat); - const u16 active = ioat2_ring_active(ioat); - const u32 new_size = 1 << order; - struct ioat_ring_ent **ring; - u16 i; - - if (order > ioat_get_max_alloc_order()) - return false; - - /* double check that we have at least 1 free descriptor */ - if (active == curr_size) - return false; - - /* when shrinking, verify that we can hold the current active - * set in the new ring - */ - if (active >= new_size) - return false; - - /* allocate the array to hold the software ring */ - ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); - if (!ring) - return false; - - /* allocate/trim descriptors as needed */ - if (new_size > curr_size) { - /* copy current descriptors to the new ring */ - for (i = 0; i < curr_size; i++) { - u16 curr_idx = (ioat->tail+i) & (curr_size-1); - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ring[new_idx] = ioat->ring[curr_idx]; - set_desc_id(ring[new_idx], new_idx); - } - - /* add new descriptors to the ring */ - for (i = curr_size; i < new_size; i++) { - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); - if (!ring[new_idx]) { - while (i--) { - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ioat2_free_ring_ent(ring[new_idx], c); - } - kfree(ring); - return false; - } - set_desc_id(ring[new_idx], new_idx); - } - - /* hw link new descriptors */ - for (i = curr_size-1; i < new_size; i++) { - u16 new_idx = (ioat->tail+i) & (new_size-1); - struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; - struct ioat_dma_descriptor *hw = ring[new_idx]->hw; - - hw->next = next->txd.phys; - } - } else { - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *next; - - /* copy current descriptors to the new ring, dropping the - * removed descriptors - */ - for (i = 0; i < new_size; i++) { - u16 curr_idx = (ioat->tail+i) & (curr_size-1); - u16 new_idx = (ioat->tail+i) & (new_size-1); - - ring[new_idx] = ioat->ring[curr_idx]; - set_desc_id(ring[new_idx], new_idx); - } - - /* free deleted descriptors */ - for (i = new_size; i < curr_size; i++) { - struct ioat_ring_ent *ent; - - ent = ioat2_get_ring_ent(ioat, ioat->tail+i); - ioat2_free_ring_ent(ent, c); - } - - /* fix up hardware ring */ - hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; - next = ring[(ioat->tail+new_size) & (new_size-1)]; - hw->next = next->txd.phys; - } - - dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", - __func__, new_size); - - kfree(ioat->ring); - ioat->ring = ring; - ioat->alloc_order = order; - - return true; -} - -/** - * ioat2_check_space_lock - verify space and grab ring producer lock - * @ioat: ioat2,3 channel (ring) to operate on - * @num_descs: allocation length - */ -int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs) -{ - struct ioat_chan_common *chan = &ioat->base; - bool retry; - - retry: - spin_lock_bh(&ioat->prep_lock); - /* never allow the last descriptor to be consumed, we need at - * least one free at all times to allow for on-the-fly ring - * resizing. - */ - if (likely(ioat2_ring_space(ioat) > num_descs)) { - dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", - __func__, num_descs, ioat->head, ioat->tail, ioat->issued); - ioat->produce = num_descs; - return 0; /* with ioat->prep_lock held */ - } - retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state); - spin_unlock_bh(&ioat->prep_lock); - - /* is another cpu already trying to expand the ring? */ - if (retry) - goto retry; - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - retry = reshape_ring(ioat, ioat->alloc_order + 1); - clear_bit(IOAT_RESHAPE_PENDING, &chan->state); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - - /* if we were able to expand the ring retry the allocation */ - if (retry) - goto retry; - - if (printk_ratelimit()) - dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n", - __func__, num_descs, ioat->head, ioat->tail, ioat->issued); - - /* progress reclaim in the allocation failure case we may be - * called under bh_disabled so we need to trigger the timer - * event directly - */ - if (time_is_before_jiffies(chan->timer.expires) - && timer_pending(&chan->timer)) { - struct ioatdma_device *device = chan->device; - - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - device->timer_fn((unsigned long) &chan->common); - } - - return -ENOMEM; -} - -struct dma_async_tx_descriptor * -ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_dma_descriptor *hw; - struct ioat_ring_ent *desc; - dma_addr_t dst = dma_dest; - dma_addr_t src = dma_src; - size_t total_len = len; - int num_descs, idx, i; - - num_descs = ioat2_xferlen_to_descs(ioat, len); - if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - hw = desc->hw; - - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - - len -= copy; - dst += copy; - src += copy; - dump_desc_dbg(ioat, desc); - } while (++i < num_descs); - - desc->txd.flags = flags; - desc->len = total_len; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - hw->ctl_f.compl_write = 1; - dump_desc_dbg(ioat, desc); - /* we leave the channel locked to ensure in order submission */ - - return &desc->txd; -} - -/** - * ioat2_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned - */ -void ioat2_free_chan_resources(struct dma_chan *c) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *desc; - const u16 total_descs = 1 << ioat->alloc_order; - int descs; - int i; - - /* Before freeing channel resources first check - * if they have been previously allocated for this channel. - */ - if (!ioat->ring) - return; - - ioat_stop(chan); - device->reset_hw(chan); - - spin_lock_bh(&chan->cleanup_lock); - spin_lock_bh(&ioat->prep_lock); - descs = ioat2_ring_space(ioat); - dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); - for (i = 0; i < descs; i++) { - desc = ioat2_get_ring_ent(ioat, ioat->head + i); - ioat2_free_ring_ent(desc, c); - } - - if (descs < total_descs) - dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", - total_descs - descs); - - for (i = 0; i < total_descs - descs; i++) { - desc = ioat2_get_ring_ent(ioat, ioat->tail + i); - dump_desc_dbg(ioat, desc); - ioat2_free_ring_ent(desc, c); - } - - kfree(ioat->ring); - ioat->ring = NULL; - ioat->alloc_order = 0; - pci_pool_free(device->completion_pool, chan->completion, - chan->completion_dma); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - - chan->last_completion = 0; - chan->completion_dma = 0; - ioat->dmacount = 0; -} - -static ssize_t ring_size_show(struct dma_chan *c, char *page) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - - return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); -} -static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); - -static ssize_t ring_active_show(struct dma_chan *c, char *page) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - - /* ...taken outside the lock, no need to be precise */ - return sprintf(page, "%d\n", ioat2_ring_active(ioat)); -} -static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); - -static struct attribute *ioat2_attrs[] = { - &ring_size_attr.attr, - &ring_active_attr.attr, - &ioat_cap_attr.attr, - &ioat_version_attr.attr, - NULL, -}; - -struct kobj_type ioat2_ktype = { - .sysfs_ops = &ioat_sysfs_ops, - .default_attrs = ioat2_attrs, -}; - -int ioat2_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - - device->enumerate_channels = ioat2_enumerate_channels; - device->reset_hw = ioat2_reset_hw; - device->cleanup_fn = ioat2_cleanup_event; - device->timer_fn = ioat2_timer_event; - device->self_test = ioat_dma_self_test; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; - dma->device_issue_pending = ioat2_issue_pending; - dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; - dma->device_free_chan_resources = ioat2_free_chan_resources; - dma->device_tx_status = ioat_dma_tx_status; - - err = ioat_probe(device); - if (err) - return err; - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - - ioat_kobject_add(device, &ioat2_ktype); - - if (dca) - device->dca = ioat2_dca_init(pdev, device->reg_base); - - return err; -} diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h deleted file mode 100644 index bf24ebe874b0..000000000000 --- a/drivers/dma/ioat/dma_v2.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef IOATDMA_V2_H -#define IOATDMA_V2_H - -#include <linux/dmaengine.h> -#include <linux/circ_buf.h> -#include "dma.h" -#include "hw.h" - - -extern int ioat_pending_level; -extern int ioat_ring_alloc_order; - -/* - * workaround for IOAT ver.3.0 null descriptor issue - * (channel returns error when size is 0) - */ -#define NULL_DESC_BUFFER_SIZE 1 - -#define IOAT_MAX_ORDER 16 -#define ioat_get_alloc_order() \ - (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) -#define ioat_get_max_alloc_order() \ - (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) - -/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes - * @base: common ioat channel parameters - * @xfercap_log; log2 of channel max transfer length (for fast division) - * @head: allocated index - * @issued: hardware notification point - * @tail: cleanup index - * @dmacount: identical to 'head' except for occasionally resetting to zero - * @alloc_order: log2 of the number of allocated descriptors - * @produce: number of descriptors to produce at submit time - * @ring: software ring buffer implementation of hardware ring - * @prep_lock: serializes descriptor preparation (producers) - */ -struct ioat2_dma_chan { - struct ioat_chan_common base; - size_t xfercap_log; - u16 head; - u16 issued; - u16 tail; - u16 dmacount; - u16 alloc_order; - u16 produce; - struct ioat_ring_ent **ring; - spinlock_t prep_lock; -}; - -static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) -{ - struct ioat_chan_common *chan = to_chan_common(c); - - return container_of(chan, struct ioat2_dma_chan, base); -} - -static inline u32 ioat2_ring_size(struct ioat2_dma_chan *ioat) -{ - return 1 << ioat->alloc_order; -} - -/* count of descriptors in flight with the engine */ -static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) -{ - return CIRC_CNT(ioat->head, ioat->tail, ioat2_ring_size(ioat)); -} - -/* count of descriptors pending submission to hardware */ -static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) -{ - return CIRC_CNT(ioat->head, ioat->issued, ioat2_ring_size(ioat)); -} - -static inline u32 ioat2_ring_space(struct ioat2_dma_chan *ioat) -{ - return ioat2_ring_size(ioat) - ioat2_ring_active(ioat); -} - -static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) -{ - u16 num_descs = len >> ioat->xfercap_log; - - num_descs += !!(len & ((1 << ioat->xfercap_log) - 1)); - return num_descs; -} - -/** - * struct ioat_ring_ent - wrapper around hardware descriptor - * @hw: hardware DMA descriptor (for memcpy) - * @fill: hardware fill descriptor - * @xor: hardware xor descriptor - * @xor_ex: hardware xor extension descriptor - * @pq: hardware pq descriptor - * @pq_ex: hardware pq extension descriptor - * @pqu: hardware pq update descriptor - * @raw: hardware raw (un-typed) descriptor - * @txd: the generic software descriptor for all engines - * @len: total transaction length for unmap - * @result: asynchronous result of validate operations - * @id: identifier for debug - */ - -struct ioat_ring_ent { - union { - struct ioat_dma_descriptor *hw; - struct ioat_xor_descriptor *xor; - struct ioat_xor_ext_descriptor *xor_ex; - struct ioat_pq_descriptor *pq; - struct ioat_pq_ext_descriptor *pq_ex; - struct ioat_pq_update_descriptor *pqu; - struct ioat_raw_descriptor *raw; - }; - size_t len; - struct dma_async_tx_descriptor txd; - enum sum_check_flags *result; - #ifdef DEBUG - int id; - #endif - struct ioat_sed_ent *sed; -}; - -static inline struct ioat_ring_ent * -ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) -{ - return ioat->ring[idx & (ioat2_ring_size(ioat) - 1)]; -} - -static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) -{ - struct ioat_chan_common *chan = &ioat->base; - - writel(addr & 0x00000000FFFFFFFF, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(addr >> 32, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); -} - -int ioat2_dma_probe(struct ioatdma_device *dev, int dca); -int ioat3_dma_probe(struct ioatdma_device *dev, int dca); -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); -int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); -int ioat2_enumerate_channels(struct ioatdma_device *device); -struct dma_async_tx_descriptor * -ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags); -void ioat2_issue_pending(struct dma_chan *chan); -int ioat2_alloc_chan_resources(struct dma_chan *c); -void ioat2_free_chan_resources(struct dma_chan *c); -void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); -bool reshape_ring(struct ioat2_dma_chan *ioat, int order); -void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); -void ioat2_cleanup_event(unsigned long data); -void ioat2_timer_event(unsigned long data); -int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo); -int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo); -extern struct kobj_type ioat2_ktype; -extern struct kmem_cache *ioat2_cache; -#endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c deleted file mode 100644 index 64790a45ef5d..000000000000 --- a/drivers/dma/ioat/dma_v3.c +++ /dev/null @@ -1,1717 +0,0 @@ -/* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * BSD LICENSE - * - * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Support routines for v3+ hardware - */ -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/gfp.h> -#include <linux/dmaengine.h> -#include <linux/dma-mapping.h> -#include <linux/prefetch.h> -#include "../dmaengine.h" -#include "registers.h" -#include "hw.h" -#include "dma.h" -#include "dma_v2.h" - -extern struct kmem_cache *ioat3_sed_cache; - -/* ioat hardware assumes at least two sources for raid operations */ -#define src_cnt_to_sw(x) ((x) + 2) -#define src_cnt_to_hw(x) ((x) - 2) -#define ndest_to_sw(x) ((x) + 1) -#define ndest_to_hw(x) ((x) - 1) -#define src16_cnt_to_sw(x) ((x) + 9) -#define src16_cnt_to_hw(x) ((x) - 9) - -/* provide a lookup table for setting the source address in the base or - * extended descriptor of an xor or pq descriptor - */ -static const u8 xor_idx_to_desc = 0xe0; -static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; -static const u8 pq_idx_to_desc = 0xf8; -static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2 }; -static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; -static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, - 0, 1, 2, 3, 4, 5, 6 }; - -static void ioat3_eh(struct ioat2_dma_chan *ioat); - -static void xor_set_src(struct ioat_raw_descriptor *descs[2], - dma_addr_t addr, u32 offset, int idx) -{ - struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; - - raw->field[xor_idx_to_field[idx]] = addr + offset; -} - -static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) -{ - struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; - - return raw->field[pq_idx_to_field[idx]]; -} - -static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) -{ - struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; - - return raw->field[pq16_idx_to_field[idx]]; -} - -static void pq_set_src(struct ioat_raw_descriptor *descs[2], - dma_addr_t addr, u32 offset, u8 coef, int idx) -{ - struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; - struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; - - raw->field[pq_idx_to_field[idx]] = addr + offset; - pq->coef[idx] = coef; -} - -static bool is_jf_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_JSF0: - case PCI_DEVICE_ID_INTEL_IOAT_JSF1: - case PCI_DEVICE_ID_INTEL_IOAT_JSF2: - case PCI_DEVICE_ID_INTEL_IOAT_JSF3: - case PCI_DEVICE_ID_INTEL_IOAT_JSF4: - case PCI_DEVICE_ID_INTEL_IOAT_JSF5: - case PCI_DEVICE_ID_INTEL_IOAT_JSF6: - case PCI_DEVICE_ID_INTEL_IOAT_JSF7: - case PCI_DEVICE_ID_INTEL_IOAT_JSF8: - case PCI_DEVICE_ID_INTEL_IOAT_JSF9: - return true; - default: - return false; - } -} - -static bool is_snb_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_SNB0: - case PCI_DEVICE_ID_INTEL_IOAT_SNB1: - case PCI_DEVICE_ID_INTEL_IOAT_SNB2: - case PCI_DEVICE_ID_INTEL_IOAT_SNB3: - case PCI_DEVICE_ID_INTEL_IOAT_SNB4: - case PCI_DEVICE_ID_INTEL_IOAT_SNB5: - case PCI_DEVICE_ID_INTEL_IOAT_SNB6: - case PCI_DEVICE_ID_INTEL_IOAT_SNB7: - case PCI_DEVICE_ID_INTEL_IOAT_SNB8: - case PCI_DEVICE_ID_INTEL_IOAT_SNB9: - return true; - default: - return false; - } -} - -static bool is_ivb_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_IVB0: - case PCI_DEVICE_ID_INTEL_IOAT_IVB1: - case PCI_DEVICE_ID_INTEL_IOAT_IVB2: - case PCI_DEVICE_ID_INTEL_IOAT_IVB3: - case PCI_DEVICE_ID_INTEL_IOAT_IVB4: - case PCI_DEVICE_ID_INTEL_IOAT_IVB5: - case PCI_DEVICE_ID_INTEL_IOAT_IVB6: - case PCI_DEVICE_ID_INTEL_IOAT_IVB7: - case PCI_DEVICE_ID_INTEL_IOAT_IVB8: - case PCI_DEVICE_ID_INTEL_IOAT_IVB9: - return true; - default: - return false; - } - -} - -static bool is_hsw_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_HSW0: - case PCI_DEVICE_ID_INTEL_IOAT_HSW1: - case PCI_DEVICE_ID_INTEL_IOAT_HSW2: - case PCI_DEVICE_ID_INTEL_IOAT_HSW3: - case PCI_DEVICE_ID_INTEL_IOAT_HSW4: - case PCI_DEVICE_ID_INTEL_IOAT_HSW5: - case PCI_DEVICE_ID_INTEL_IOAT_HSW6: - case PCI_DEVICE_ID_INTEL_IOAT_HSW7: - case PCI_DEVICE_ID_INTEL_IOAT_HSW8: - case PCI_DEVICE_ID_INTEL_IOAT_HSW9: - return true; - default: - return false; - } - -} - -static bool is_xeon_cb32(struct pci_dev *pdev) -{ - return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || - is_hsw_ioat(pdev); -} - -static bool is_bwd_ioat(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_BWD0: - case PCI_DEVICE_ID_INTEL_IOAT_BWD1: - case PCI_DEVICE_ID_INTEL_IOAT_BWD2: - case PCI_DEVICE_ID_INTEL_IOAT_BWD3: - /* even though not Atom, BDX-DE has same DMA silicon */ - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: - return true; - default: - return false; - } -} - -static bool is_bwd_noraid(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IOAT_BWD2: - case PCI_DEVICE_ID_INTEL_IOAT_BWD3: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: - case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: - return true; - default: - return false; - } - -} - -static void pq16_set_src(struct ioat_raw_descriptor *desc[3], - dma_addr_t addr, u32 offset, u8 coef, unsigned idx) -{ - struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; - struct ioat_pq16a_descriptor *pq16 = - (struct ioat_pq16a_descriptor *)desc[1]; - struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; - - raw->field[pq16_idx_to_field[idx]] = addr + offset; - - if (idx < 8) - pq->coef[idx] = coef; - else - pq16->coef[idx - 8] = coef; -} - -static struct ioat_sed_ent * -ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) -{ - struct ioat_sed_ent *sed; - gfp_t flags = __GFP_ZERO | GFP_ATOMIC; - - sed = kmem_cache_alloc(ioat3_sed_cache, flags); - if (!sed) - return NULL; - - sed->hw_pool = hw_pool; - sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool], - flags, &sed->dma); - if (!sed->hw) { - kmem_cache_free(ioat3_sed_cache, sed); - return NULL; - } - - return sed; -} - -static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) -{ - if (!sed) - return; - - dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); - kmem_cache_free(ioat3_sed_cache, sed); -} - -static bool desc_has_ext(struct ioat_ring_ent *desc) -{ - struct ioat_dma_descriptor *hw = desc->hw; - - if (hw->ctl_f.op == IOAT_OP_XOR || - hw->ctl_f.op == IOAT_OP_XOR_VAL) { - struct ioat_xor_descriptor *xor = desc->xor; - - if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) - return true; - } else if (hw->ctl_f.op == IOAT_OP_PQ || - hw->ctl_f.op == IOAT_OP_PQ_VAL) { - struct ioat_pq_descriptor *pq = desc->pq; - - if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) - return true; - } - - return false; -} - -static u64 ioat3_get_current_completion(struct ioat_chan_common *chan) -{ - u64 phys_complete; - u64 completion; - - completion = *chan->completion; - phys_complete = ioat_chansts_to_addr(completion); - - dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, - (unsigned long long) phys_complete); - - return phys_complete; -} - -static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, - u64 *phys_complete) -{ - *phys_complete = ioat3_get_current_completion(chan); - if (*phys_complete == chan->last_completion) - return false; - - clear_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - - return true; -} - -static void -desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc) -{ - struct ioat_dma_descriptor *hw = desc->hw; - - switch (hw->ctl_f.op) { - case IOAT_OP_PQ_VAL: - case IOAT_OP_PQ_VAL_16S: - { - struct ioat_pq_descriptor *pq = desc->pq; - - /* check if there's error written */ - if (!pq->dwbes_f.wbes) - return; - - /* need to set a chanerr var for checking to clear later */ - - if (pq->dwbes_f.p_val_err) - *desc->result |= SUM_CHECK_P_RESULT; - - if (pq->dwbes_f.q_val_err) - *desc->result |= SUM_CHECK_Q_RESULT; - - return; - } - default: - return; - } -} - -/** - * __cleanup - reclaim used descriptors - * @ioat: channel (ring) to clean - * - * The difference from the dma_v2.c __cleanup() is that this routine - * handles extended descriptors and dma-unmapping raid operations. - */ -static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) -{ - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *desc; - bool seen_current = false; - int idx = ioat->tail, i; - u16 active; - - dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", - __func__, ioat->head, ioat->tail, ioat->issued); - - /* - * At restart of the channel, the completion address and the - * channel status will be 0 due to starting a new chain. Since - * it's new chain and the first descriptor "fails", there is - * nothing to clean up. We do not want to reap the entire submitted - * chain due to this 0 address value and then BUG. - */ - if (!phys_complete) - return; - - active = ioat2_ring_active(ioat); - for (i = 0; i < active && !seen_current; i++) { - struct dma_async_tx_descriptor *tx; - - smp_read_barrier_depends(); - prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); - desc = ioat2_get_ring_ent(ioat, idx + i); - dump_desc_dbg(ioat, desc); - - /* set err stat if we are using dwbes */ - if (device->cap & IOAT_CAP_DWBES) - desc_get_errstat(ioat, desc); - - tx = &desc->txd; - if (tx->cookie) { - dma_cookie_complete(tx); - dma_descriptor_unmap(tx); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - - if (tx->phys == phys_complete) - seen_current = true; - - /* skip extended descriptors */ - if (desc_has_ext(desc)) { - BUG_ON(i + 1 >= active); - i++; - } - - /* cleanup super extended descriptors */ - if (desc->sed) { - ioat3_free_sed(device, desc->sed); - desc->sed = NULL; - } - } - smp_mb(); /* finish all descriptor reads before incrementing tail */ - ioat->tail = idx + i; - BUG_ON(active && !seen_current); /* no active descs have written a completion? */ - chan->last_completion = phys_complete; - - if (active - i == 0) { - dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", - __func__); - clear_bit(IOAT_COMPLETION_PENDING, &chan->state); - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } - /* 5 microsecond delay per pending descriptor */ - writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), - chan->device->reg_base + IOAT_INTRDELAY_OFFSET); -} - -static void ioat3_cleanup(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - u64 phys_complete; - - spin_lock_bh(&chan->cleanup_lock); - - if (ioat3_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - if (is_ioat_halted(*chan->completion)) { - u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - - if (chanerr & IOAT_CHANERR_HANDLE_MASK) { - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - ioat3_eh(ioat); - } - } - - spin_unlock_bh(&chan->cleanup_lock); -} - -static void ioat3_cleanup_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - - ioat3_cleanup(ioat); - if (!test_bit(IOAT_RUN, &chan->state)) - return; - writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); -} - -static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - u64 phys_complete; - - ioat2_quiesce(chan, 0); - if (ioat3_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - __ioat2_restart_chan(ioat); -} - -static void ioat3_eh(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - struct pci_dev *pdev = to_pdev(chan); - struct ioat_dma_descriptor *hw; - struct dma_async_tx_descriptor *tx; - u64 phys_complete; - struct ioat_ring_ent *desc; - u32 err_handled = 0; - u32 chanerr_int; - u32 chanerr; - - /* cleanup so tail points to descriptor that caused the error */ - if (ioat3_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); - - dev_dbg(to_dev(chan), "%s: error = %x:%x\n", - __func__, chanerr, chanerr_int); - - desc = ioat2_get_ring_ent(ioat, ioat->tail); - hw = desc->hw; - dump_desc_dbg(ioat, desc); - - switch (hw->ctl_f.op) { - case IOAT_OP_XOR_VAL: - if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { - *desc->result |= SUM_CHECK_P_RESULT; - err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; - } - break; - case IOAT_OP_PQ_VAL: - case IOAT_OP_PQ_VAL_16S: - if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { - *desc->result |= SUM_CHECK_P_RESULT; - err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; - } - if (chanerr & IOAT_CHANERR_XOR_Q_ERR) { - *desc->result |= SUM_CHECK_Q_RESULT; - err_handled |= IOAT_CHANERR_XOR_Q_ERR; - } - break; - } - - /* fault on unhandled error or spurious halt */ - if (chanerr ^ err_handled || chanerr == 0) { - dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n", - __func__, chanerr, err_handled); - BUG(); - } else { /* cleanup the faulty descriptor */ - tx = &desc->txd; - if (tx->cookie) { - dma_cookie_complete(tx); - dma_descriptor_unmap(tx); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - } - - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int); - - /* mark faulting descriptor as complete */ - *chan->completion = desc->txd.phys; - - spin_lock_bh(&ioat->prep_lock); - ioat3_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); -} - -static void check_active(struct ioat2_dma_chan *ioat) -{ - struct ioat_chan_common *chan = &ioat->base; - - if (ioat2_ring_active(ioat)) { - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - return; - } - - if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state)) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - else if (ioat->alloc_order > ioat_get_alloc_order()) { - /* if the ring is idle, empty, and oversized try to step - * down the size - */ - reshape_ring(ioat, ioat->alloc_order - 1); - - /* keep shrinking until we get back to our minimum - * default size - */ - if (ioat->alloc_order > ioat_get_alloc_order()) - mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); - } - -} - -static void ioat3_timer_event(unsigned long data) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); - struct ioat_chan_common *chan = &ioat->base; - dma_addr_t phys_complete; - u64 status; - - status = ioat_chansts(chan); - - /* when halted due to errors check for channel - * programming errors before advancing the completion state - */ - if (is_ioat_halted(status)) { - u32 chanerr; - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - dev_err(to_dev(chan), "%s: Channel halted (%x)\n", - __func__, chanerr); - if (test_bit(IOAT_RUN, &chan->state)) - BUG_ON(is_ioat_bug(chanerr)); - else /* we never got off the ground */ - return; - } - - /* if we haven't made progress and we have already - * acknowledged a pending completion once, then be more - * forceful with a restart - */ - spin_lock_bh(&chan->cleanup_lock); - if (ioat_cleanup_preamble(chan, &phys_complete)) - __cleanup(ioat, phys_complete); - else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { - spin_lock_bh(&ioat->prep_lock); - ioat3_restart_channel(ioat); - spin_unlock_bh(&ioat->prep_lock); - spin_unlock_bh(&chan->cleanup_lock); - return; - } else { - set_bit(IOAT_COMPLETION_ACK, &chan->state); - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - } - - - if (ioat2_ring_active(ioat)) - mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); - else { - spin_lock_bh(&ioat->prep_lock); - check_active(ioat); - spin_unlock_bh(&ioat->prep_lock); - } - spin_unlock_bh(&chan->cleanup_lock); -} - -static enum dma_status -ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, - struct dma_tx_state *txstate) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - enum dma_status ret; - - ret = dma_cookie_status(c, cookie, txstate); - if (ret == DMA_COMPLETE) - return ret; - - ioat3_cleanup(ioat); - - return dma_cookie_status(c, cookie, txstate); -} - -static struct dma_async_tx_descriptor * -__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, - dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_ring_ent *compl_desc; - struct ioat_ring_ent *desc; - struct ioat_ring_ent *ext; - size_t total_len = len; - struct ioat_xor_descriptor *xor; - struct ioat_xor_ext_descriptor *xor_ex = NULL; - struct ioat_dma_descriptor *hw; - int num_descs, with_ext, idx, i; - u32 offset = 0; - u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; - - BUG_ON(src_cnt < 2); - - num_descs = ioat2_xferlen_to_descs(ioat, len); - /* we need 2x the number of descriptors to cover greater than 5 - * sources - */ - if (src_cnt > 5) { - with_ext = 1; - num_descs *= 2; - } else - with_ext = 0; - - /* completion writes from the raid engine may pass completion - * writes from the legacy engine, so we need one extra null - * (legacy) descriptor to ensure all completion writes arrive in - * order. - */ - if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - struct ioat_raw_descriptor *descs[2]; - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - int s; - - desc = ioat2_get_ring_ent(ioat, idx + i); - xor = desc->xor; - - /* save a branch by unconditionally retrieving the - * extended descriptor xor_set_src() knows to not write - * to it in the single descriptor case - */ - ext = ioat2_get_ring_ent(ioat, idx + i + 1); - xor_ex = ext->xor_ex; - - descs[0] = (struct ioat_raw_descriptor *) xor; - descs[1] = (struct ioat_raw_descriptor *) xor_ex; - for (s = 0; s < src_cnt; s++) - xor_set_src(descs, src[s], offset, s); - xor->size = xfer_size; - xor->dst_addr = dest + offset; - xor->ctl = 0; - xor->ctl_f.op = op; - xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); - - len -= xfer_size; - offset += xfer_size; - dump_desc_dbg(ioat, desc); - } while ((i += 1 + with_ext) < num_descs); - - /* last xor descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - - /* completion descriptor carries interrupt bit */ - compl_desc = ioat2_get_ring_ent(ioat, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat, compl_desc); - - /* we leave the channel locked to ensure in order submission */ - return &compl_desc->txd; -} - -static struct dma_async_tx_descriptor * -ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, - unsigned int src_cnt, size_t len, unsigned long flags) -{ - return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, - unsigned int src_cnt, size_t len, - enum sum_check_flags *result, unsigned long flags) -{ - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *result = 0; - - return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], - src_cnt - 1, len, flags); -} - -static void -dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) -{ - struct device *dev = to_dev(&ioat->base); - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; - struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; - int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); - int i; - - dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" - " src_cnt: %d)\n", - desc_id(desc), (unsigned long long) desc->txd.phys, - (unsigned long long) (pq_ex ? pq_ex->next : pq->next), - desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, - pq->ctl_f.compl_write, - pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", - pq->ctl_f.src_cnt); - for (i = 0; i < src_cnt; i++) - dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, - (unsigned long long) pq_get_src(descs, i), pq->coef[i]); - dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); - dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); - dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); -} - -static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat, - struct ioat_ring_ent *desc) -{ - struct device *dev = to_dev(&ioat->base); - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_raw_descriptor *descs[] = { (void *)pq, - (void *)pq, - (void *)pq }; - int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); - int i; - - if (desc->sed) { - descs[1] = (void *)desc->sed->hw; - descs[2] = (void *)desc->sed->hw + 64; - } - - dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" - " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" - " src_cnt: %d)\n", - desc_id(desc), (unsigned long long) desc->txd.phys, - (unsigned long long) pq->next, - desc->txd.flags, pq->size, pq->ctl, - pq->ctl_f.op, pq->ctl_f.int_en, - pq->ctl_f.compl_write, - pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", - pq->ctl_f.src_cnt); - for (i = 0; i < src_cnt; i++) { - dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, - (unsigned long long) pq16_get_src(descs, i), - pq->coef[i]); - } - dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); - dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); -} - -static struct dma_async_tx_descriptor * -__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, - const dma_addr_t *dst, const dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *compl_desc; - struct ioat_ring_ent *desc; - struct ioat_ring_ent *ext; - size_t total_len = len; - struct ioat_pq_descriptor *pq; - struct ioat_pq_ext_descriptor *pq_ex = NULL; - struct ioat_dma_descriptor *hw; - u32 offset = 0; - u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; - int i, s, idx, with_ext, num_descs; - int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0; - - dev_dbg(to_dev(chan), "%s\n", __func__); - /* the engine requires at least two sources (we provide - * at least 1 implied source in the DMA_PREP_CONTINUE case) - */ - BUG_ON(src_cnt + dmaf_continue(flags) < 2); - - num_descs = ioat2_xferlen_to_descs(ioat, len); - /* we need 2x the number of descriptors to cover greater than 3 - * sources (we need 1 extra source in the q-only continuation - * case and 3 extra sources in the p+q continuation case. - */ - if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || - (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { - with_ext = 1; - num_descs *= 2; - } else - with_ext = 0; - - /* completion writes from the raid engine may pass completion - * writes from the legacy engine, so we need one extra null - * (legacy) descriptor to ensure all completion writes arrive in - * order. - */ - if (likely(num_descs) && - ioat2_check_space_lock(ioat, num_descs + cb32) == 0) - idx = ioat->head; - else - return NULL; - i = 0; - do { - struct ioat_raw_descriptor *descs[2]; - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - pq = desc->pq; - - /* save a branch by unconditionally retrieving the - * extended descriptor pq_set_src() knows to not write - * to it in the single descriptor case - */ - ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); - pq_ex = ext->pq_ex; - - descs[0] = (struct ioat_raw_descriptor *) pq; - descs[1] = (struct ioat_raw_descriptor *) pq_ex; - - for (s = 0; s < src_cnt; s++) - pq_set_src(descs, src[s], offset, scf[s], s); - - /* see the comment for dma_maxpq in include/linux/dmaengine.h */ - if (dmaf_p_disabled_continue(flags)) - pq_set_src(descs, dst[1], offset, 1, s++); - else if (dmaf_continue(flags)) { - pq_set_src(descs, dst[0], offset, 0, s++); - pq_set_src(descs, dst[1], offset, 1, s++); - pq_set_src(descs, dst[1], offset, 0, s++); - } - pq->size = xfer_size; - pq->p_addr = dst[0] + offset; - pq->q_addr = dst[1] + offset; - pq->ctl = 0; - pq->ctl_f.op = op; - /* we turn on descriptor write back error status */ - if (device->cap & IOAT_CAP_DWBES) - pq->ctl_f.wb_en = result ? 1 : 0; - pq->ctl_f.src_cnt = src_cnt_to_hw(s); - pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); - pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); - - len -= xfer_size; - offset += xfer_size; - } while ((i += 1 + with_ext) < num_descs); - - /* last pq descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - dump_pq_desc_dbg(ioat, desc, ext); - - if (!cb32) { - pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - pq->ctl_f.compl_write = 1; - compl_desc = desc; - } else { - /* completion descriptor carries interrupt bit */ - compl_desc = ioat2_get_ring_ent(ioat, idx + i); - compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; - hw = compl_desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - dump_desc_dbg(ioat, compl_desc); - } - - - /* we leave the channel locked to ensure in order submission */ - return &compl_desc->txd; -} - -static struct dma_async_tx_descriptor * -__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, - const dma_addr_t *dst, const dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, - size_t len, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *device = chan->device; - struct ioat_ring_ent *desc; - size_t total_len = len; - struct ioat_pq_descriptor *pq; - u32 offset = 0; - u8 op; - int i, s, idx, num_descs; - - /* this function is only called with 9-16 sources */ - op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; - - dev_dbg(to_dev(chan), "%s\n", __func__); - - num_descs = ioat2_xferlen_to_descs(ioat, len); - - /* - * 16 source pq is only available on cb3.3 and has no completion - * write hw bug. - */ - if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0) - idx = ioat->head; - else - return NULL; - - i = 0; - - do { - struct ioat_raw_descriptor *descs[4]; - size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); - - desc = ioat2_get_ring_ent(ioat, idx + i); - pq = desc->pq; - - descs[0] = (struct ioat_raw_descriptor *) pq; - - desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3); - if (!desc->sed) { - dev_err(to_dev(chan), - "%s: no free sed entries\n", __func__); - return NULL; - } - - pq->sed_addr = desc->sed->dma; - desc->sed->parent = desc; - - descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; - descs[2] = (void *)descs[1] + 64; - - for (s = 0; s < src_cnt; s++) - pq16_set_src(descs, src[s], offset, scf[s], s); - - /* see the comment for dma_maxpq in include/linux/dmaengine.h */ - if (dmaf_p_disabled_continue(flags)) - pq16_set_src(descs, dst[1], offset, 1, s++); - else if (dmaf_continue(flags)) { - pq16_set_src(descs, dst[0], offset, 0, s++); - pq16_set_src(descs, dst[1], offset, 1, s++); - pq16_set_src(descs, dst[1], offset, 0, s++); - } - - pq->size = xfer_size; - pq->p_addr = dst[0] + offset; - pq->q_addr = dst[1] + offset; - pq->ctl = 0; - pq->ctl_f.op = op; - pq->ctl_f.src_cnt = src16_cnt_to_hw(s); - /* we turn on descriptor write back error status */ - if (device->cap & IOAT_CAP_DWBES) - pq->ctl_f.wb_en = result ? 1 : 0; - pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); - pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); - - len -= xfer_size; - offset += xfer_size; - } while (++i < num_descs); - - /* last pq descriptor carries the unmap parameters and fence bit */ - desc->txd.flags = flags; - desc->len = total_len; - if (result) - desc->result = result; - pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - - /* with cb3.3 we should be able to do completion w/o a null desc */ - pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); - pq->ctl_f.compl_write = 1; - - dump_pq16_desc_dbg(ioat, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - -static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) -{ - if (dmaf_p_disabled_continue(flags)) - return src_cnt + 1; - else if (dmaf_continue(flags)) - return src_cnt + 3; - else - return src_cnt; -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - unsigned long flags) -{ - /* specify valid address for disabled result */ - if (flags & DMA_PREP_PQ_DISABLE_P) - dst[0] = dst[1]; - if (flags & DMA_PREP_PQ_DISABLE_Q) - dst[1] = dst[0]; - - /* handle the single source multiply case from the raid6 - * recovery path - */ - if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { - dma_addr_t single_source[2]; - unsigned char single_source_coef[2]; - - BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); - single_source[0] = src[0]; - single_source[1] = src[0]; - single_source_coef[0] = scf[0]; - single_source_coef[1] = 0; - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, NULL, dst, single_source, - 2, single_source_coef, len, - flags) : - __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, - single_source_coef, len, flags); - - } else { - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt, - scf, len, flags) : - __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, - scf, len, flags); - } -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, - unsigned int src_cnt, const unsigned char *scf, size_t len, - enum sum_check_flags *pqres, unsigned long flags) -{ - /* specify valid address for disabled result */ - if (flags & DMA_PREP_PQ_DISABLE_P) - pq[0] = pq[1]; - if (flags & DMA_PREP_PQ_DISABLE_Q) - pq[1] = pq[0]; - - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *pqres = 0; - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags) : - __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, - flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, - unsigned int src_cnt, size_t len, unsigned long flags) -{ - unsigned char scf[src_cnt]; - dma_addr_t pq[2]; - - memset(scf, 0, src_cnt); - pq[0] = dst; - flags |= DMA_PREP_PQ_DISABLE_Q; - pq[1] = dst; /* specify valid address for disabled result */ - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags) : - __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, - flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, - unsigned int src_cnt, size_t len, - enum sum_check_flags *result, unsigned long flags) -{ - unsigned char scf[src_cnt]; - dma_addr_t pq[2]; - - /* the cleanup routine only sets bits on validate failure, it - * does not clear bits on validate success... so clear it here - */ - *result = 0; - - memset(scf, 0, src_cnt); - pq[0] = src[0]; - flags |= DMA_PREP_PQ_DISABLE_Q; - pq[1] = pq[0]; /* specify valid address for disabled result */ - - return src_cnt_flags(src_cnt, flags) > 8 ? - __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, - scf, len, flags) : - __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, - scf, len, flags); -} - -static struct dma_async_tx_descriptor * -ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) -{ - struct ioat2_dma_chan *ioat = to_ioat2_chan(c); - struct ioat_ring_ent *desc; - struct ioat_dma_descriptor *hw; - - if (ioat2_check_space_lock(ioat, 1) == 0) - desc = ioat2_get_ring_ent(ioat, ioat->head); - else - return NULL; - - hw = desc->hw; - hw->ctl = 0; - hw->ctl_f.null = 1; - hw->ctl_f.int_en = 1; - hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); - hw->ctl_f.compl_write = 1; - hw->size = NULL_DESC_BUFFER_SIZE; - hw->src_addr = 0; - hw->dst_addr = 0; - - desc->txd.flags = flags; - desc->len = 1; - - dump_desc_dbg(ioat, desc); - - /* we leave the channel locked to ensure in order submission */ - return &desc->txd; -} - -static void ioat3_dma_test_callback(void *dma_async_param) -{ - struct completion *cmp = dma_async_param; - - complete(cmp); -} - -#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ -static int ioat_xor_val_self_test(struct ioatdma_device *device) -{ - int i, src_idx; - struct page *dest; - struct page *xor_srcs[IOAT_NUM_SRC_TEST]; - struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; - dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; - dma_addr_t dest_dma; - struct dma_async_tx_descriptor *tx; - struct dma_chan *dma_chan; - dma_cookie_t cookie; - u8 cmp_byte = 0; - u32 cmp_word; - u32 xor_val_result; - int err = 0; - struct completion cmp; - unsigned long tmo; - struct device *dev = &device->pdev->dev; - struct dma_device *dma = &device->common; - u8 op = 0; - - dev_dbg(dev, "%s\n", __func__); - - if (!dma_has_cap(DMA_XOR, dma->cap_mask)) - return 0; - - for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { - xor_srcs[src_idx] = alloc_page(GFP_KERNEL); - if (!xor_srcs[src_idx]) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - } - - dest = alloc_page(GFP_KERNEL); - if (!dest) { - while (src_idx--) - __free_page(xor_srcs[src_idx]); - return -ENOMEM; - } - - /* Fill in src buffers */ - for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { - u8 *ptr = page_address(xor_srcs[src_idx]); - for (i = 0; i < PAGE_SIZE; i++) - ptr[i] = (1 << src_idx); - } - - for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) - cmp_byte ^= (u8) (1 << src_idx); - - cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | - (cmp_byte << 8) | cmp_byte; - - memset(page_address(dest), 0, PAGE_SIZE); - - dma_chan = container_of(dma->channels.next, struct dma_chan, - device_node); - if (dma->device_alloc_chan_resources(dma_chan) < 1) { - err = -ENODEV; - goto out; - } - - /* test xor */ - op = IOAT_OP_XOR; - - dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, dest_dma)) - goto dma_unmap; - - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - dma_srcs[i] = DMA_ERROR_CODE; - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) { - dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_srcs[i])) - goto dma_unmap; - } - tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, - IOAT_NUM_SRC_TEST, PAGE_SIZE, - DMA_PREP_INTERRUPT); - - if (!tx) { - dev_err(dev, "Self-test xor prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test xor setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dev, "Self-test xor timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - - dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); - for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { - u32 *ptr = page_address(dest); - if (ptr[i] != cmp_word) { - dev_err(dev, "Self-test xor failed compare\n"); - err = -ENODEV; - goto free_resources; - } - } - dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); - - dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); - - /* skip validate if the capability is not present */ - if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) - goto free_resources; - - op = IOAT_OP_XOR_VAL; - - /* validate the sources with the destintation page */ - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - xor_val_srcs[i] = xor_srcs[i]; - xor_val_srcs[i] = dest; - - xor_val_result = 1; - - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = DMA_ERROR_CODE; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { - dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_srcs[i])) - goto dma_unmap; - } - tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, - IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, - &xor_val_result, DMA_PREP_INTERRUPT); - if (!tx) { - dev_err(dev, "Self-test zero prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test zero setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dev, "Self-test validate timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - - if (xor_val_result != 0) { - dev_err(dev, "Self-test validate failed compare\n"); - err = -ENODEV; - goto free_resources; - } - - memset(page_address(dest), 0, PAGE_SIZE); - - /* test for non-zero parity sum */ - op = IOAT_OP_XOR_VAL; - - xor_val_result = 0; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = DMA_ERROR_CODE; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { - dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma_srcs[i])) - goto dma_unmap; - } - tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, - IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, - &xor_val_result, DMA_PREP_INTERRUPT); - if (!tx) { - dev_err(dev, "Self-test 2nd zero prep failed\n"); - err = -ENODEV; - goto dma_unmap; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat3_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(dev, "Self-test 2nd zero setup failed\n"); - err = -ENODEV; - goto dma_unmap; - } - dma->device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { - dev_err(dev, "Self-test 2nd validate timed out\n"); - err = -ENODEV; - goto dma_unmap; - } - - if (xor_val_result != SUM_CHECK_P_RESULT) { - dev_err(dev, "Self-test validate failed compare\n"); - err = -ENODEV; - goto dma_unmap; - } - - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); - - goto free_resources; -dma_unmap: - if (op == IOAT_OP_XOR) { - if (dest_dma != DMA_ERROR_CODE) - dma_unmap_page(dev, dest_dma, PAGE_SIZE, - DMA_FROM_DEVICE); - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - if (dma_srcs[i] != DMA_ERROR_CODE) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, - DMA_TO_DEVICE); - } else if (op == IOAT_OP_XOR_VAL) { - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - if (dma_srcs[i] != DMA_ERROR_CODE) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, - DMA_TO_DEVICE); - } -free_resources: - dma->device_free_chan_resources(dma_chan); -out: - src_idx = IOAT_NUM_SRC_TEST; - while (src_idx--) - __free_page(xor_srcs[src_idx]); - __free_page(dest); - return err; -} - -static int ioat3_dma_self_test(struct ioatdma_device *device) -{ - int rc = ioat_dma_self_test(device); - - if (rc) - return rc; - - rc = ioat_xor_val_self_test(device); - if (rc) - return rc; - - return 0; -} - -static int ioat3_irq_reinit(struct ioatdma_device *device) -{ - struct pci_dev *pdev = device->pdev; - int irq = pdev->irq, i; - - if (!is_bwd_ioat(pdev)) - return 0; - - switch (device->irq_mode) { - case IOAT_MSIX: - for (i = 0; i < device->common.chancnt; i++) { - struct msix_entry *msix = &device->msix_entries[i]; - struct ioat_chan_common *chan; - - chan = ioat_chan_by_index(device, i); - devm_free_irq(&pdev->dev, msix->vector, chan); - } - - pci_disable_msix(pdev); - break; - case IOAT_MSI: - pci_disable_msi(pdev); - /* fall through */ - case IOAT_INTX: - devm_free_irq(&pdev->dev, irq, device); - break; - default: - return 0; - } - device->irq_mode = IOAT_NOIRQ; - - return ioat_dma_setup_interrupts(device); -} - -static int ioat3_reset_hw(struct ioat_chan_common *chan) -{ - /* throw away whatever the channel was doing and get it - * initialized, with ioat3 specific workarounds - */ - struct ioatdma_device *device = chan->device; - struct pci_dev *pdev = device->pdev; - u32 chanerr; - u16 dev_id; - int err; - - ioat2_quiesce(chan, msecs_to_jiffies(100)); - - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - - if (device->version < IOAT_VER_3_3) { - /* clear any pending errors */ - err = pci_read_config_dword(pdev, - IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); - if (err) { - dev_err(&pdev->dev, - "channel error register unreachable\n"); - return err; - } - pci_write_config_dword(pdev, - IOAT_PCI_CHANERR_INT_OFFSET, chanerr); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { - pci_write_config_dword(pdev, - IOAT_PCI_DMAUNCERRSTS_OFFSET, - 0x10); - } - } - - err = ioat2_reset_sync(chan, msecs_to_jiffies(200)); - if (!err) - err = ioat3_irq_reinit(device); - - if (err) - dev_err(&pdev->dev, "Failed to reset: %d\n", err); - - return err; -} - -static void ioat3_intr_quirk(struct ioatdma_device *device) -{ - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - u32 errmask; - - dma = &device->common; - - /* - * if we have descriptor write back error status, we mask the - * error interrupts - */ - if (device->cap & IOAT_CAP_DWBES) { - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - errmask = readl(chan->reg_base + - IOAT_CHANERR_MASK_OFFSET); - errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | - IOAT_CHANERR_XOR_Q_ERR; - writel(errmask, chan->reg_base + - IOAT_CHANERR_MASK_OFFSET); - } - } -} - -int ioat3_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - int dca_en = system_has_dca_enabled(pdev); - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - bool is_raid_device = false; - int err; - - device->enumerate_channels = ioat2_enumerate_channels; - device->reset_hw = ioat3_reset_hw; - device->self_test = ioat3_dma_self_test; - device->intr_quirk = ioat3_intr_quirk; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; - dma->device_issue_pending = ioat2_issue_pending; - dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; - dma->device_free_chan_resources = ioat2_free_chan_resources; - - dma_cap_set(DMA_INTERRUPT, dma->cap_mask); - dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; - - device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); - - if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev)) - device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); - - /* dca is incompatible with raid operations */ - if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) - device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); - - if (device->cap & IOAT_CAP_XOR) { - is_raid_device = true; - dma->max_xor = 8; - - dma_cap_set(DMA_XOR, dma->cap_mask); - dma->device_prep_dma_xor = ioat3_prep_xor; - - dma_cap_set(DMA_XOR_VAL, dma->cap_mask); - dma->device_prep_dma_xor_val = ioat3_prep_xor_val; - } - - if (device->cap & IOAT_CAP_PQ) { - is_raid_device = true; - - dma->device_prep_dma_pq = ioat3_prep_pq; - dma->device_prep_dma_pq_val = ioat3_prep_pq_val; - dma_cap_set(DMA_PQ, dma->cap_mask); - dma_cap_set(DMA_PQ_VAL, dma->cap_mask); - - if (device->cap & IOAT_CAP_RAID16SS) { - dma_set_maxpq(dma, 16, 0); - } else { - dma_set_maxpq(dma, 8, 0); - } - - if (!(device->cap & IOAT_CAP_XOR)) { - dma->device_prep_dma_xor = ioat3_prep_pqxor; - dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; - dma_cap_set(DMA_XOR, dma->cap_mask); - dma_cap_set(DMA_XOR_VAL, dma->cap_mask); - - if (device->cap & IOAT_CAP_RAID16SS) { - dma->max_xor = 16; - } else { - dma->max_xor = 8; - } - } - } - - dma->device_tx_status = ioat3_tx_status; - device->cleanup_fn = ioat3_cleanup_event; - device->timer_fn = ioat3_timer_event; - - /* starting with CB3.3 super extended descriptors are supported */ - if (device->cap & IOAT_CAP_RAID16SS) { - char pool_name[14]; - int i; - - for (i = 0; i < MAX_SED_POOLS; i++) { - snprintf(pool_name, 14, "ioat_hw%d_sed", i); - - /* allocate SED DMA pool */ - device->sed_hw_pool[i] = dmam_pool_create(pool_name, - &pdev->dev, - SED_SIZE * (i + 1), 64, 0); - if (!device->sed_hw_pool[i]) - return -ENOMEM; - - } - } - - err = ioat_probe(device); - if (err) - return err; - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - - ioat_kobject_add(device, &ioat2_ktype); - - if (dca) - device->dca = ioat3_dca_init(pdev, device->reg_base); - - return 0; -} diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index a3e731edce57..ec64aced5655 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -21,11 +21,6 @@ #define IOAT_MMIO_BAR 0 /* CB device ID's */ -#define IOAT_PCI_DID_5000 0x1A38 -#define IOAT_PCI_DID_CNB 0x360B -#define IOAT_PCI_DID_SCNB 0x65FF -#define IOAT_PCI_DID_SNB 0x402F - #define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 #define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 #define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c new file mode 100644 index 000000000000..60a7c3211e0d --- /dev/null +++ b/drivers/dma/ioat/init.c @@ -0,0 +1,1284 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/prefetch.h> +#include <linux/dca.h> +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + + /* I/OAT v3.2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, + + /* I/OAT v3.3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) }, + + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); + +static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); +static void ioat_remove(struct pci_dev *pdev); +static void +ioat_init_channel(struct ioatdma_device *ioat_dma, + struct ioatdma_chan *ioat_chan, int idx); +static void ioat_intr_quirk(struct ioatdma_device *ioat_dma); +static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma); +static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); +int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); +int ioat_ring_alloc_order = 8; +module_param(ioat_ring_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_alloc_order, + "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)"); +int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; +module_param(ioat_ring_max_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_max_alloc_order, + "ioat+: upper limit for ring size (default: 16)"); +static char ioat_interrupt_style[32] = "msix"; +module_param_string(ioat_interrupt_style, ioat_interrupt_style, + sizeof(ioat_interrupt_style), 0644); +MODULE_PARM_DESC(ioat_interrupt_style, + "set ioat interrupt style: msix (default), msi, intx"); + +struct kmem_cache *ioat_cache; +struct kmem_cache *ioat_sed_cache; + +static bool is_jf_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_JSF0: + case PCI_DEVICE_ID_INTEL_IOAT_JSF1: + case PCI_DEVICE_ID_INTEL_IOAT_JSF2: + case PCI_DEVICE_ID_INTEL_IOAT_JSF3: + case PCI_DEVICE_ID_INTEL_IOAT_JSF4: + case PCI_DEVICE_ID_INTEL_IOAT_JSF5: + case PCI_DEVICE_ID_INTEL_IOAT_JSF6: + case PCI_DEVICE_ID_INTEL_IOAT_JSF7: + case PCI_DEVICE_ID_INTEL_IOAT_JSF8: + case PCI_DEVICE_ID_INTEL_IOAT_JSF9: + return true; + default: + return false; + } +} + +static bool is_snb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_SNB0: + case PCI_DEVICE_ID_INTEL_IOAT_SNB1: + case PCI_DEVICE_ID_INTEL_IOAT_SNB2: + case PCI_DEVICE_ID_INTEL_IOAT_SNB3: + case PCI_DEVICE_ID_INTEL_IOAT_SNB4: + case PCI_DEVICE_ID_INTEL_IOAT_SNB5: + case PCI_DEVICE_ID_INTEL_IOAT_SNB6: + case PCI_DEVICE_ID_INTEL_IOAT_SNB7: + case PCI_DEVICE_ID_INTEL_IOAT_SNB8: + case PCI_DEVICE_ID_INTEL_IOAT_SNB9: + return true; + default: + return false; + } +} + +static bool is_ivb_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_IVB0: + case PCI_DEVICE_ID_INTEL_IOAT_IVB1: + case PCI_DEVICE_ID_INTEL_IOAT_IVB2: + case PCI_DEVICE_ID_INTEL_IOAT_IVB3: + case PCI_DEVICE_ID_INTEL_IOAT_IVB4: + case PCI_DEVICE_ID_INTEL_IOAT_IVB5: + case PCI_DEVICE_ID_INTEL_IOAT_IVB6: + case PCI_DEVICE_ID_INTEL_IOAT_IVB7: + case PCI_DEVICE_ID_INTEL_IOAT_IVB8: + case PCI_DEVICE_ID_INTEL_IOAT_IVB9: + return true; + default: + return false; + } + +} + +static bool is_hsw_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_HSW0: + case PCI_DEVICE_ID_INTEL_IOAT_HSW1: + case PCI_DEVICE_ID_INTEL_IOAT_HSW2: + case PCI_DEVICE_ID_INTEL_IOAT_HSW3: + case PCI_DEVICE_ID_INTEL_IOAT_HSW4: + case PCI_DEVICE_ID_INTEL_IOAT_HSW5: + case PCI_DEVICE_ID_INTEL_IOAT_HSW6: + case PCI_DEVICE_ID_INTEL_IOAT_HSW7: + case PCI_DEVICE_ID_INTEL_IOAT_HSW8: + case PCI_DEVICE_ID_INTEL_IOAT_HSW9: + return true; + default: + return false; + } + +} + +static bool is_xeon_cb32(struct pci_dev *pdev) +{ + return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) || + is_hsw_ioat(pdev); +} + +bool is_bwd_ioat(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD0: + case PCI_DEVICE_ID_INTEL_IOAT_BWD1: + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + /* even though not Atom, BDX-DE has same DMA silicon */ + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: + return true; + default: + return false; + } +} + +static bool is_bwd_noraid(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT_BWD2: + case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: + return true; + default: + return false; + } + +} + +/* + * Perform a IOAT transaction to verify the HW works. + */ +#define IOAT_TEST_SIZE 2000 + +static void ioat_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +/** + * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. + * @ioat_dma: dma device to be tested + */ +static int ioat_dma_self_test(struct ioatdma_device *ioat_dma) +{ + int i; + u8 *src; + u8 *dest; + struct dma_device *dma = &ioat_dma->dma_dev; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int err = 0; + struct completion cmp; + unsigned long tmo; + unsigned long flags; + + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOAT_TEST_SIZE; i++) + src[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + dev_err(dev, "selftest cannot allocate chan resource\n"); + err = -ENODEV; + goto out; + } + + dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_src)) { + dev_err(dev, "mapping src buffer failed\n"); + goto free_resources; + } + dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_dest)) { + dev_err(dev, "mapping dest buffer failed\n"); + goto unmap_src; + } + flags = DMA_PREP_INTERRUPT; + tx = ioat_dma->dma_dev.device_prep_dma_memcpy(dma_chan, dma_dest, + dma_src, IOAT_TEST_SIZE, + flags); + if (!tx) { + dev_err(dev, "Self-test prep failed, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test setup failed, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) + != DMA_COMPLETE) { + dev_err(dev, "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto unmap_dma; + } + if (memcmp(src, dest, IOAT_TEST_SIZE)) { + dev_err(dev, "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +unmap_dma: + dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); +unmap_src: + dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE); +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +/** + * ioat_dma_setup_interrupts - setup interrupt handler + * @ioat_dma: ioat dma device + */ +int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + struct pci_dev *pdev = ioat_dma->pdev; + struct device *dev = &pdev->dev; + struct msix_entry *msix; + int i, j, msixcnt; + int err = -EINVAL; + u8 intrctrl = 0; + + if (!strcmp(ioat_interrupt_style, "msix")) + goto msix; + if (!strcmp(ioat_interrupt_style, "msi")) + goto msi; + if (!strcmp(ioat_interrupt_style, "intx")) + goto intx; + dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); + goto err_no_irq; + +msix: + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = ioat_dma->dma_dev.chancnt; + for (i = 0; i < msixcnt; i++) + ioat_dma->msix_entries[i].entry = i; + + err = pci_enable_msix_exact(pdev, ioat_dma->msix_entries, msixcnt); + if (err) + goto msi; + + for (i = 0; i < msixcnt; i++) { + msix = &ioat_dma->msix_entries[i]; + ioat_chan = ioat_chan_by_index(ioat_dma, i); + err = devm_request_irq(dev, msix->vector, + ioat_dma_do_interrupt_msix, 0, + "ioat-msix", ioat_chan); + if (err) { + for (j = 0; j < i; j++) { + msix = &ioat_dma->msix_entries[j]; + ioat_chan = ioat_chan_by_index(ioat_dma, j); + devm_free_irq(dev, msix->vector, ioat_chan); + } + goto msi; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + ioat_dma->irq_mode = IOAT_MSIX; + goto done; + +msi: + err = pci_enable_msi(pdev); + if (err) + goto intx; + + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, + "ioat-msi", ioat_dma); + if (err) { + pci_disable_msi(pdev); + goto intx; + } + ioat_dma->irq_mode = IOAT_MSI; + goto done; + +intx: + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", ioat_dma); + if (err) + goto err_no_irq; + + ioat_dma->irq_mode = IOAT_INTX; +done: + if (is_bwd_ioat(pdev)) + ioat_intr_quirk(ioat_dma); + intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; + writeb(intrctrl, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); + return 0; + +err_no_irq: + /* Disable all interrupt generation */ + writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); + ioat_dma->irq_mode = IOAT_NOIRQ; + dev_err(dev, "no usable interrupts\n"); + return err; +} + +static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma) +{ + /* Disable all interrupt generation */ + writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET); +} + +static int ioat_probe(struct ioatdma_device *ioat_dma) +{ + int err = -ENODEV; + struct dma_device *dma = &ioat_dma->dma_dev; + struct pci_dev *pdev = ioat_dma->pdev; + struct device *dev = &pdev->dev; + + /* DMA coherent memory pool for DMA descriptor allocations */ + ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev, + sizeof(struct ioat_dma_descriptor), + 64, 0); + if (!ioat_dma->dma_pool) { + err = -ENOMEM; + goto err_dma_pool; + } + + ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev, + sizeof(u64), + SMP_CACHE_BYTES, + SMP_CACHE_BYTES); + + if (!ioat_dma->completion_pool) { + err = -ENOMEM; + goto err_completion_pool; + } + + ioat_enumerate_channels(ioat_dma); + + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->dev = &pdev->dev; + + if (!dma->chancnt) { + dev_err(dev, "channel enumeration error\n"); + goto err_setup_interrupts; + } + + err = ioat_dma_setup_interrupts(ioat_dma); + if (err) + goto err_setup_interrupts; + + err = ioat3_dma_self_test(ioat_dma); + if (err) + goto err_self_test; + + return 0; + +err_self_test: + ioat_disable_interrupts(ioat_dma); +err_setup_interrupts: + pci_pool_destroy(ioat_dma->completion_pool); +err_completion_pool: + pci_pool_destroy(ioat_dma->dma_pool); +err_dma_pool: + return err; +} + +static int ioat_register(struct ioatdma_device *ioat_dma) +{ + int err = dma_async_device_register(&ioat_dma->dma_dev); + + if (err) { + ioat_disable_interrupts(ioat_dma); + pci_pool_destroy(ioat_dma->completion_pool); + pci_pool_destroy(ioat_dma->dma_pool); + } + + return err; +} + +static void ioat_dma_remove(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + + ioat_disable_interrupts(ioat_dma); + + ioat_kobject_del(ioat_dma); + + dma_async_device_unregister(dma); + + pci_pool_destroy(ioat_dma->dma_pool); + pci_pool_destroy(ioat_dma->completion_pool); + + INIT_LIST_HEAD(&dma->channels); +} + +/** + * ioat_enumerate_channels - find and initialize the device's channels + * @ioat_dma: the ioat dma device to be enumerated + */ +static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) +{ + struct ioatdma_chan *ioat_chan; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_device *dma = &ioat_dma->dma_dev; + u8 xfercap_log; + int i; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(ioat_dma->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(ioat_dma->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(ioat_dma->idx)); + dma->chancnt = ARRAY_SIZE(ioat_dma->idx); + } + xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_log &= 0x1f; /* bits [4:0] valid */ + if (xfercap_log == 0) + return 0; + dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); + + for (i = 0; i < dma->chancnt; i++) { + ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL); + if (!ioat_chan) + break; + + ioat_init_channel(ioat_dma, ioat_chan, i); + ioat_chan->xfercap_log = xfercap_log; + spin_lock_init(&ioat_chan->prep_lock); + if (ioat_reset_hw(ioat_chan)) { + i = 0; + break; + } + } + dma->chancnt = i; + return i; +} + +/** + * ioat_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat_free_chan_resources(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + const int total_descs = 1 << ioat_chan->alloc_order; + int descs; + int i; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (!ioat_chan->ring) + return; + + ioat_stop(ioat_chan); + ioat_reset_hw(ioat_chan); + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + descs = ioat_ring_space(ioat_chan); + dev_dbg(to_dev(ioat_chan), "freeing %d idle descriptors\n", descs); + for (i = 0; i < descs; i++) { + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head + i); + ioat_free_ring_ent(desc, c); + } + + if (descs < total_descs) + dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n", + total_descs - descs); + + for (i = 0; i < total_descs - descs; i++) { + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail + i); + dump_desc_dbg(ioat_chan, desc); + ioat_free_ring_ent(desc, c); + } + + kfree(ioat_chan->ring); + ioat_chan->ring = NULL; + ioat_chan->alloc_order = 0; + pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion, + ioat_chan->completion_dma); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + ioat_chan->last_completion = 0; + ioat_chan->completion_dma = 0; + ioat_chan->dmacount = 0; +} + +/* ioat_alloc_chan_resources - allocate/initialize ioat descriptor ring + * @chan: channel to be initialized + */ +static int ioat_alloc_chan_resources(struct dma_chan *c) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent **ring; + u64 status; + int order; + int i = 0; + u32 chanerr; + + /* have we already been set up? */ + if (ioat_chan->ring) + return 1 << ioat_chan->alloc_order; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + ioat_chan->completion = + pci_pool_alloc(ioat_chan->ioat_dma->completion_pool, + GFP_KERNEL, &ioat_chan->completion_dma); + if (!ioat_chan->completion) + return -ENOMEM; + + memset(ioat_chan->completion, 0, sizeof(*ioat_chan->completion)); + writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64)ioat_chan->completion_dma) >> 32, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + order = ioat_get_alloc_order(); + ring = ioat_alloc_ring(c, order, GFP_KERNEL); + if (!ring) + return -ENOMEM; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->prep_lock); + ioat_chan->ring = ring; + ioat_chan->head = 0; + ioat_chan->issued = 0; + ioat_chan->tail = 0; + ioat_chan->alloc_order = order; + set_bit(IOAT_RUN, &ioat_chan->state); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + + ioat_start_null_desc(ioat_chan); + + /* check that we got off the ground */ + do { + udelay(1); + status = ioat_chansts(ioat_chan); + } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); + + if (is_ioat_active(status) || is_ioat_idle(status)) + return 1 << ioat_chan->alloc_order; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + + dev_WARN(to_dev(ioat_chan), + "failed to start channel chanerr: %#x\n", chanerr); + ioat_free_chan_resources(c); + return -EFAULT; +} + +/* common channel initialization */ +static void +ioat_init_channel(struct ioatdma_device *ioat_dma, + struct ioatdma_chan *ioat_chan, int idx) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c = &ioat_chan->dma_chan; + unsigned long data = (unsigned long) c; + + ioat_chan->ioat_dma = ioat_dma; + ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1)); + spin_lock_init(&ioat_chan->cleanup_lock); + ioat_chan->dma_chan.device = dma; + dma_cookie_init(&ioat_chan->dma_chan); + list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels); + ioat_dma->idx[idx] = ioat_chan; + init_timer(&ioat_chan->timer); + ioat_chan->timer.function = ioat_timer_event; + ioat_chan->timer.data = data; + tasklet_init(&ioat_chan->cleanup_task, ioat_cleanup_event, data); +} + +#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ +static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOAT_NUM_SRC_TEST]; + struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 xor_val_result; + int err = 0; + struct completion cmp; + unsigned long tmo; + struct device *dev = &ioat_dma->pdev->dev; + struct dma_device *dma = &ioat_dma->dma_dev; + u8 op = 0; + + dev_dbg(dev, "%s\n", __func__); + + if (!dma_has_cap(DMA_XOR, dma->cap_mask)) + return 0; + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + op = IOAT_OP_XOR; + + dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dest_dma)) + goto dma_unmap; + + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_srcs[i] = DMA_ERROR_CODE; + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) { + dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) + goto dma_unmap; + } + tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOAT_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT); + + if (!tx) { + dev_err(dev, "Self-test xor prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test xor setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test xor timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + + if (ptr[i] != cmp_word) { + dev_err(dev, "Self-test xor failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + + /* skip validate if the capability is not present */ + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) + goto free_resources; + + op = IOAT_OP_XOR_VAL; + + /* validate the sources with the destintation page */ + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + xor_val_srcs[i] = xor_srcs[i]; + xor_val_srcs[i] = dest; + + xor_val_result = 1; + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = DMA_ERROR_CODE; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) + goto dma_unmap; + } + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test zero prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test zero setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test validate timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + if (xor_val_result != 0) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + + memset(page_address(dest), 0, PAGE_SIZE); + + /* test for non-zero parity sum */ + op = IOAT_OP_XOR_VAL; + + xor_val_result = 0; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = DMA_ERROR_CODE; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_srcs[i])) + goto dma_unmap; + } + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test 2nd zero prep failed\n"); + err = -ENODEV; + goto dma_unmap; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test 2nd zero setup failed\n"); + err = -ENODEV; + goto dma_unmap; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { + dev_err(dev, "Self-test 2nd validate timed out\n"); + err = -ENODEV; + goto dma_unmap; + } + + if (xor_val_result != SUM_CHECK_P_RESULT) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto dma_unmap; + } + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE); + + goto free_resources; +dma_unmap: + if (op == IOAT_OP_XOR) { + if (dest_dma != DMA_ERROR_CODE) + dma_unmap_page(dev, dest_dma, PAGE_SIZE, + DMA_FROM_DEVICE); + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + if (dma_srcs[i] != DMA_ERROR_CODE) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + } else if (op == IOAT_OP_XOR_VAL) { + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + if (dma_srcs[i] != DMA_ERROR_CODE) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + } +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + src_idx = IOAT_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma) +{ + int rc; + + rc = ioat_dma_self_test(ioat_dma); + if (rc) + return rc; + + rc = ioat_xor_val_self_test(ioat_dma); + + return rc; +} + +static void ioat_intr_quirk(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma; + struct dma_chan *c; + struct ioatdma_chan *ioat_chan; + u32 errmask; + + dma = &ioat_dma->dma_dev; + + /* + * if we have descriptor write back error status, we mask the + * error interrupts + */ + if (ioat_dma->cap & IOAT_CAP_DWBES) { + list_for_each_entry(c, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(c); + errmask = readl(ioat_chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR | + IOAT_CHANERR_XOR_Q_ERR; + writel(errmask, ioat_chan->reg_base + + IOAT_CHANERR_MASK_OFFSET); + } + } +} + +static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) +{ + struct pci_dev *pdev = ioat_dma->pdev; + int dca_en = system_has_dca_enabled(pdev); + struct dma_device *dma; + struct dma_chan *c; + struct ioatdma_chan *ioat_chan; + bool is_raid_device = false; + int err; + + dma = &ioat_dma->dma_dev; + dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat_issue_pending; + dma->device_alloc_chan_resources = ioat_alloc_chan_resources; + dma->device_free_chan_resources = ioat_free_chan_resources; + + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + dma->device_prep_dma_interrupt = ioat_prep_interrupt_lock; + + ioat_dma->cap = readl(ioat_dma->reg_base + IOAT_DMA_CAP_OFFSET); + + if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev)) + ioat_dma->cap &= + ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS); + + /* dca is incompatible with raid operations */ + if (dca_en && (ioat_dma->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) + ioat_dma->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); + + if (ioat_dma->cap & IOAT_CAP_XOR) { + is_raid_device = true; + dma->max_xor = 8; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat_prep_xor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat_prep_xor_val; + } + + if (ioat_dma->cap & IOAT_CAP_PQ) { + is_raid_device = true; + + dma->device_prep_dma_pq = ioat_prep_pq; + dma->device_prep_dma_pq_val = ioat_prep_pq_val; + dma_cap_set(DMA_PQ, dma->cap_mask); + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); + + if (ioat_dma->cap & IOAT_CAP_RAID16SS) + dma_set_maxpq(dma, 16, 0); + else + dma_set_maxpq(dma, 8, 0); + + if (!(ioat_dma->cap & IOAT_CAP_XOR)) { + dma->device_prep_dma_xor = ioat_prep_pqxor; + dma->device_prep_dma_xor_val = ioat_prep_pqxor_val; + dma_cap_set(DMA_XOR, dma->cap_mask); + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + + if (ioat_dma->cap & IOAT_CAP_RAID16SS) + dma->max_xor = 16; + else + dma->max_xor = 8; + } + } + + dma->device_tx_status = ioat_tx_status; + + /* starting with CB3.3 super extended descriptors are supported */ + if (ioat_dma->cap & IOAT_CAP_RAID16SS) { + char pool_name[14]; + int i; + + for (i = 0; i < MAX_SED_POOLS; i++) { + snprintf(pool_name, 14, "ioat_hw%d_sed", i); + + /* allocate SED DMA pool */ + ioat_dma->sed_hw_pool[i] = dmam_pool_create(pool_name, + &pdev->dev, + SED_SIZE * (i + 1), 64, 0); + if (!ioat_dma->sed_hw_pool[i]) + return -ENOMEM; + + } + } + + if (!(ioat_dma->cap & (IOAT_CAP_XOR | IOAT_CAP_PQ))) + dma_cap_set(DMA_PRIVATE, dma->cap_mask); + + err = ioat_probe(ioat_dma); + if (err) + return err; + + list_for_each_entry(c, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(c); + writel(IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(ioat_dma); + if (err) + return err; + + ioat_kobject_add(ioat_dma, &ioat_ktype); + + if (dca) + ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base); + + return 0; +} + +#define DRV_NAME "ioatdma" + +static struct pci_driver ioat_pci_driver = { + .name = DRV_NAME, + .id_table = ioat_pci_tbl, + .probe = ioat_pci_probe, + .remove = ioat_remove, +}; + +static struct ioatdma_device * +alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) +{ + struct device *dev = &pdev->dev; + struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + + if (!d) + return NULL; + d->pdev = pdev; + d->reg_base = iobase; + return d; +} + +static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + void __iomem * const *iomap; + struct device *dev = &pdev->dev; + struct ioatdma_device *device; + int err; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); + if (err) + return err; + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); + if (!device) + return -ENOMEM; + pci_set_master(pdev); + pci_set_drvdata(pdev, device); + + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + if (device->version >= IOAT_VER_3_0) + err = ioat3_dma_probe(device, ioat_dca_enabled); + else + return -ENODEV; + + if (err) { + dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); + return -ENODEV; + } + + return 0; +} + +static void ioat_remove(struct pci_dev *pdev) +{ + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device) + return; + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca, &pdev->dev); + free_dca_provider(device->dca); + device->dca = NULL; + } + ioat_dma_remove(device); +} + +static int __init ioat_init_module(void) +{ + int err = -ENOMEM; + + pr_info("%s: Intel(R) QuickData Technology Driver %s\n", + DRV_NAME, IOAT_DMA_VERSION); + + ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ioat_cache) + return -ENOMEM; + + ioat_sed_cache = KMEM_CACHE(ioat_sed_ent, 0); + if (!ioat_sed_cache) + goto err_ioat_cache; + + err = pci_register_driver(&ioat_pci_driver); + if (err) + goto err_ioat3_cache; + + return 0; + + err_ioat3_cache: + kmem_cache_destroy(ioat_sed_cache); + + err_ioat_cache: + kmem_cache_destroy(ioat_cache); + + return err; +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat_cache); +} +module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c deleted file mode 100644 index 76f0dc688a19..000000000000 --- a/drivers/dma/ioat/pci.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2007 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine, which does asynchronous - * copy operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dca.h> -#include <linux/slab.h> -#include "dma.h" -#include "dma_v2.h" -#include "registers.h" -#include "hw.h" - -MODULE_VERSION(IOAT_DMA_VERSION); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel Corporation"); - -static struct pci_device_id ioat_pci_tbl[] = { - /* I/OAT v1 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, - { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, - - /* I/OAT v2 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, - - /* I/OAT v3 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, - - /* I/OAT v3.2 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) }, - - /* I/OAT v3.3 platforms */ - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) }, - - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) }, - - { 0, } -}; -MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); - -static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); -static void ioat_remove(struct pci_dev *pdev); - -static int ioat_dca_enabled = 1; -module_param(ioat_dca_enabled, int, 0644); -MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); - -struct kmem_cache *ioat2_cache; -struct kmem_cache *ioat3_sed_cache; - -#define DRV_NAME "ioatdma" - -static struct pci_driver ioat_pci_driver = { - .name = DRV_NAME, - .id_table = ioat_pci_tbl, - .probe = ioat_pci_probe, - .remove = ioat_remove, -}; - -static struct ioatdma_device * -alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) -{ - struct device *dev = &pdev->dev; - struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); - - if (!d) - return NULL; - d->pdev = pdev; - d->reg_base = iobase; - return d; -} - -static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - void __iomem * const *iomap; - struct device *dev = &pdev->dev; - struct ioatdma_device *device; - int err; - - err = pcim_enable_device(pdev); - if (err) - return err; - - err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); - if (err) - return err; - iomap = pcim_iomap_table(pdev); - if (!iomap) - return -ENOMEM; - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - return err; - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - return err; - - device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); - if (!device) - return -ENOMEM; - pci_set_master(pdev); - pci_set_drvdata(pdev, device); - - device->version = readb(device->reg_base + IOAT_VER_OFFSET); - if (device->version == IOAT_VER_1_2) - err = ioat1_dma_probe(device, ioat_dca_enabled); - else if (device->version == IOAT_VER_2_0) - err = ioat2_dma_probe(device, ioat_dca_enabled); - else if (device->version >= IOAT_VER_3_0) - err = ioat3_dma_probe(device, ioat_dca_enabled); - else - return -ENODEV; - - if (err) { - dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); - return -ENODEV; - } - - return 0; -} - -static void ioat_remove(struct pci_dev *pdev) -{ - struct ioatdma_device *device = pci_get_drvdata(pdev); - - if (!device) - return; - - dev_err(&pdev->dev, "Removing dma and dca services\n"); - if (device->dca) { - unregister_dca_provider(device->dca, &pdev->dev); - free_dca_provider(device->dca); - device->dca = NULL; - } - ioat_dma_remove(device); -} - -static int __init ioat_init_module(void) -{ - int err = -ENOMEM; - - pr_info("%s: Intel(R) QuickData Technology Driver %s\n", - DRV_NAME, IOAT_DMA_VERSION); - - ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ioat2_cache) - return -ENOMEM; - - ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0); - if (!ioat3_sed_cache) - goto err_ioat2_cache; - - err = pci_register_driver(&ioat_pci_driver); - if (err) - goto err_ioat3_cache; - - return 0; - - err_ioat3_cache: - kmem_cache_destroy(ioat3_sed_cache); - - err_ioat2_cache: - kmem_cache_destroy(ioat2_cache); - - return err; -} -module_init(ioat_init_module); - -static void __exit ioat_exit_module(void) -{ - pci_unregister_driver(&ioat_pci_driver); - kmem_cache_destroy(ioat2_cache); -} -module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/prep.c b/drivers/dma/ioat/prep.c new file mode 100644 index 000000000000..e323a4036908 --- /dev/null +++ b/drivers/dma/ioat/prep.c @@ -0,0 +1,707 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/gfp.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/prefetch.h> +#include "../dmaengine.h" +#include "registers.h" +#include "hw.h" +#include "dma.h" + +/* provide a lookup table for setting the source address in the base or + * extended descriptor of an xor or pq descriptor + */ +static const u8 xor_idx_to_desc = 0xe0; +static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2 }; +static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6 }; + +static void xor_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + raw->field[xor_idx_to_field[idx]] = addr + offset; +} + +static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + return raw->field[pq_idx_to_field[idx]]; +} + +static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) +{ + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + return raw->field[pq16_idx_to_field[idx]]; +} + +static void pq_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + raw->field[pq_idx_to_field[idx]] = addr + offset; + pq->coef[idx] = coef; +} + +static void pq16_set_src(struct ioat_raw_descriptor *desc[3], + dma_addr_t addr, u32 offset, u8 coef, unsigned idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; + struct ioat_pq16a_descriptor *pq16 = + (struct ioat_pq16a_descriptor *)desc[1]; + struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; + + raw->field[pq16_idx_to_field[idx]] = addr + offset; + + if (idx < 8) + pq->coef[idx] = coef; + else + pq16->coef[idx - 8] = coef; +} + +static struct ioat_sed_ent * +ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) +{ + struct ioat_sed_ent *sed; + gfp_t flags = __GFP_ZERO | GFP_ATOMIC; + + sed = kmem_cache_alloc(ioat_sed_cache, flags); + if (!sed) + return NULL; + + sed->hw_pool = hw_pool; + sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], + flags, &sed->dma); + if (!sed->hw) { + kmem_cache_free(ioat_sed_cache, sed); + return NULL; + } + + return sed; +} + +struct dma_async_tx_descriptor * +ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs, idx, i; + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + dump_desc_dbg(ioat_chan, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat_chan, desc); + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + + +static struct dma_async_tx_descriptor * +__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, + dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + struct ioat_dma_descriptor *hw; + int num_descs, with_ext, idx, i; + u32 offset = 0; + u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; + + BUG_ON(src_cnt < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 5 + * sources + */ + if (src_cnt > 5) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs+1) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, + len, 1 << ioat_chan->xfercap_log); + int s; + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + xor = desc->xor; + + /* save a branch by unconditionally retrieving the + * extended descriptor xor_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + 1); + xor_ex = ext->xor_ex; + + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (s = 0; s < src_cnt; s++) + xor_set_src(descs, src[s], offset, s); + xor->size = xfer_size; + xor->dst_addr = dest + offset; + xor->ctl = 0; + xor->ctl_f.op = op; + xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); + + len -= xfer_size; + offset += xfer_size; + dump_desc_dbg(ioat_chan, desc); + } while ((i += 1 + with_ext) < num_descs); + + /* last xor descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +struct dma_async_tx_descriptor * +ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + return __ioat_prep_xor_lock(chan, result, src[0], &src[1], + src_cnt - 1, len, flags); +} + +static void +dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc, + struct ioat_ring_ent *ext) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; + struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) (pq_ex ? pq_ex->next : pq->next), + desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, + pq->ctl_f.int_en, pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq_get_src(descs, i), pq->coef[i]); + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); + dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); +} + +static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan, + struct ioat_ring_ent *desc) +{ + struct device *dev = to_dev(ioat_chan); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_raw_descriptor *descs[] = { (void *)pq, + (void *)pq, + (void *)pq }; + int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + if (desc->sed) { + descs[1] = (void *)desc->sed->hw; + descs[2] = (void *)desc->sed->hw + 64; + } + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" + " src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) pq->next, + desc->txd.flags, pq->size, pq->ctl, + pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) { + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq16_get_src(descs, i), + pq->coef[i]); + } + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + int i, s, idx, with_ext, num_descs; + int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + /* the engine requires at least two sources (we provide + * at least 1 implied source in the DMA_PREP_CONTINUE case) + */ + BUG_ON(src_cnt + dmaf_continue(flags) < 2); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + /* we need 2x the number of descriptors to cover greater than 3 + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. + */ + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0) + idx = ioat_chan->head; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + /* save a branch by unconditionally retrieving the + * extended descriptor pq_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext); + pq_ex = ext->pq_ex; + + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + + for (s = 0; s < src_cnt; s++) + pq_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq_set_src(descs, dst[0], offset, 0, s++); + pq_set_src(descs, dst[1], offset, 1, s++); + pq_set_src(descs, dst[1], offset, 0, s++); + } + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.src_cnt = src_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while ((i += 1 + with_ext) < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + dump_pq_desc_dbg(ioat_chan, desc, ext); + + if (!cb32) { + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + compl_desc = desc; + } else { + /* completion descriptor carries interrupt bit */ + compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat_chan, compl_desc); + } + + + /* we leave the channel locked to ensure in order submission */ + return &compl_desc->txd; +} + +static struct dma_async_tx_descriptor * +__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + u32 offset = 0; + u8 op; + int i, s, idx, num_descs; + + /* this function is only called with 9-16 sources */ + op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; + + dev_dbg(to_dev(ioat_chan), "%s\n", __func__); + + num_descs = ioat_xferlen_to_descs(ioat_chan, len); + + /* + * 16 source pq is only available on cb3.3 and has no completion + * write hw bug. + */ + if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0) + idx = ioat_chan->head; + else + return NULL; + + i = 0; + + do { + struct ioat_raw_descriptor *descs[4]; + size_t xfer_size = min_t(size_t, len, + 1 << ioat_chan->xfercap_log); + + desc = ioat_get_ring_ent(ioat_chan, idx + i); + pq = desc->pq; + + descs[0] = (struct ioat_raw_descriptor *) pq; + + desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3); + if (!desc->sed) { + dev_err(to_dev(ioat_chan), + "%s: no free sed entries\n", __func__); + return NULL; + } + + pq->sed_addr = desc->sed->dma; + desc->sed->parent = desc; + + descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; + descs[2] = (void *)descs[1] + 64; + + for (s = 0; s < src_cnt; s++) + pq16_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq16_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq16_set_src(descs, dst[0], offset, 0, s++); + pq16_set_src(descs, dst[1], offset, 1, s++); + pq16_set_src(descs, dst[1], offset, 0, s++); + } + + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src16_cnt_to_hw(s); + /* we turn on descriptor write back error status */ + if (ioat_dma->cap & IOAT_CAP_DWBES) + pq->ctl_f.wb_en = result ? 1 : 0; + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while (++i < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* with cb3.3 we should be able to do completion w/o a null desc */ + pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + pq->ctl_f.compl_write = 1; + + dump_pq16_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) +{ + if (dmaf_p_disabled_continue(flags)) + return src_cnt + 1; + else if (dmaf_continue(flags)) + return src_cnt + 3; + else + return src_cnt; +} + +struct dma_async_tx_descriptor * +ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + dst[1] = dst[0]; + + /* handle the single source multiply case from the raid6 + * recovery path + */ + if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { + dma_addr_t single_source[2]; + unsigned char single_source_coef[2]; + + BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); + single_source[0] = src[0]; + single_source[1] = src[0]; + single_source_coef[0] = scf[0]; + single_source_coef[1] = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, single_source, + 2, single_source_coef, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + + } else { + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags) : + __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt, + scf, len, flags); + } +} + +struct dma_async_tx_descriptor * +ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + /* specify valid address for disabled result */ + if (flags & DMA_PREP_PQ_DISABLE_P) + pq[0] = pq[1]; + if (flags & DMA_PREP_PQ_DISABLE_Q) + pq[1] = pq[0]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *pqres = 0; + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + memset(scf, 0, src_cnt); + pq[0] = dst; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = dst; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags) : + __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + memset(scf, 0, src_cnt); + pq[0] = src[0]; + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[1] = pq[0]; /* specify valid address for disabled result */ + + return src_cnt_flags(src_cnt, flags) > 8 ? + __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags) : + __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, + scf, len, flags); +} + +struct dma_async_tx_descriptor * +ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + + if (ioat_check_space_lock(ioat_chan, 1) == 0) + desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); + else + return NULL; + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + + desc->txd.flags = flags; + desc->len = 1; + + dump_desc_dbg(ioat_chan, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + diff --git a/drivers/dma/ioat/sysfs.c b/drivers/dma/ioat/sysfs.c new file mode 100644 index 000000000000..cb4a857ee21b --- /dev/null +++ b/drivers/dma/ioat/sysfs.c @@ -0,0 +1,135 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/dmaengine.h> +#include <linux/pci.h> +#include "dma.h" +#include "registers.h" +#include "hw.h" + +#include "../dmaengine.h" + +static ssize_t cap_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + + return sprintf(page, "copy%s%s%s%s%s\n", + dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", + dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", + dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", + dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", + dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + +} +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); + +static ssize_t version_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + struct ioatdma_device *ioat_dma = to_ioatdma_device(dma); + + return sprintf(page, "%d.%d\n", + ioat_dma->version >> 4, ioat_dma->version & 0xf); +} +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); + +static ssize_t +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct ioat_sysfs_entry *entry; + struct ioatdma_chan *ioat_chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + ioat_chan = container_of(kobj, struct ioatdma_chan, kobj); + + if (!entry->show) + return -EIO; + return entry->show(&ioat_chan->dma_chan, page); +} + +const struct sysfs_ops ioat_sysfs_ops = { + .show = ioat_attr_show, +}; + +void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + struct kobject *parent = &c->dev->device.kobj; + int err; + + err = kobject_init_and_add(&ioat_chan->kobj, type, + parent, "quickdata"); + if (err) { + dev_warn(to_dev(ioat_chan), + "sysfs init error (%d), continuing...\n", err); + kobject_put(&ioat_chan->kobj); + set_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state); + } + } +} + +void ioat_kobject_del(struct ioatdma_device *ioat_dma) +{ + struct dma_device *dma = &ioat_dma->dma_dev; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + if (!test_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state)) { + kobject_del(&ioat_chan->kobj); + kobject_put(&ioat_chan->kobj); + } + } +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + return sprintf(page, "%d\n", (1 << ioat_chan->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat_ring_active(ioat_chan)); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static struct attribute *ioat_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +struct kobj_type ioat_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat_attrs, +}; diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c index 2e284a4438bc..7489d2a5d246 100644 --- a/drivers/dma/ipu/ipu_irq.c +++ b/drivers/dma/ipu/ipu_irq.c @@ -265,10 +265,10 @@ int ipu_irq_unmap(unsigned int source) return ret; } -/* Chained IRQ handler for IPU error interrupt */ -static void ipu_irq_err(unsigned int irq, struct irq_desc *desc) +/* Chained IRQ handler for IPU function and error interrupt */ +static void ipu_irq_handler(unsigned int __irq, struct irq_desc *desc) { - struct ipu *ipu = irq_get_handler_data(irq); + struct ipu *ipu = irq_desc_get_handler_data(desc); u32 status; int i, line; @@ -286,43 +286,7 @@ static void ipu_irq_err(unsigned int irq, struct irq_desc *desc) raw_spin_unlock(&bank_lock); while ((line = ffs(status))) { struct ipu_irq_map *map; - - line--; - status &= ~(1UL << line); - - raw_spin_lock(&bank_lock); - map = src2map(32 * i + line); - if (map) - irq = map->irq; - raw_spin_unlock(&bank_lock); - - if (!map) { - pr_err("IPU: Interrupt on unmapped source %u bank %d\n", - line, i); - continue; - } - generic_handle_irq(irq); - } - } -} - -/* Chained IRQ handler for IPU function interrupt */ -static void ipu_irq_fn(unsigned int irq, struct irq_desc *desc) -{ - struct ipu *ipu = irq_desc_get_handler_data(desc); - u32 status; - int i, line; - - for (i = 0; i < IPU_IRQ_NR_FN_BANKS; i++) { - struct ipu_irq_bank *bank = irq_bank + i; - - raw_spin_lock(&bank_lock); - status = ipu_read_reg(ipu, bank->status); - /* Not clearing all interrupts, see above */ - status &= ipu_read_reg(ipu, bank->control); - raw_spin_unlock(&bank_lock); - while ((line = ffs(status))) { - struct ipu_irq_map *map; + unsigned int irq; line--; status &= ~(1UL << line); @@ -377,16 +341,12 @@ int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev) irq_map[i].irq = irq; irq_map[i].source = -EINVAL; irq_set_handler(irq, handle_level_irq); -#ifdef CONFIG_ARM - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); -#endif + irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE); } - irq_set_handler_data(ipu->irq_fn, ipu); - irq_set_chained_handler(ipu->irq_fn, ipu_irq_fn); + irq_set_chained_handler_and_data(ipu->irq_fn, ipu_irq_handler, ipu); - irq_set_handler_data(ipu->irq_err, ipu); - irq_set_chained_handler(ipu->irq_err, ipu_irq_err); + irq_set_chained_handler_and_data(ipu->irq_err, ipu_irq_handler, ipu); ipu->irq_base = irq_base; @@ -399,16 +359,12 @@ void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev) irq_base = ipu->irq_base; - irq_set_chained_handler(ipu->irq_fn, NULL); - irq_set_handler_data(ipu->irq_fn, NULL); + irq_set_chained_handler_and_data(ipu->irq_fn, NULL, NULL); - irq_set_chained_handler(ipu->irq_err, NULL); - irq_set_handler_data(ipu->irq_err, NULL); + irq_set_chained_handler_and_data(ipu->irq_err, NULL, NULL); for (irq = irq_base; irq < irq_base + CONFIG_MX3_IPU_IRQS; irq++) { -#ifdef CONFIG_ARM - set_irq_flags(irq, 0); -#endif + irq_set_status_flags(irq, IRQ_NOREQUEST); irq_set_chip(irq, NULL); irq_set_chip_data(irq, NULL); } diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c index 647e362f01fd..1ba2fd73852d 100644 --- a/drivers/dma/k3dma.c +++ b/drivers/dma/k3dma.c @@ -24,7 +24,6 @@ #include "virt-dma.h" #define DRIVER_NAME "k3-dma" -#define DMA_ALIGN 3 #define DMA_MAX_SIZE 0x1ffc #define INT_STAT 0x00 @@ -732,7 +731,7 @@ static int k3_dma_probe(struct platform_device *op) d->slave.device_pause = k3_dma_transfer_pause; d->slave.device_resume = k3_dma_transfer_resume; d->slave.device_terminate_all = k3_dma_terminate_all; - d->slave.copy_align = DMA_ALIGN; + d->slave.copy_align = DMAENGINE_ALIGN_8_BYTES; /* init virtual channel */ d->chans = devm_kzalloc(&op->dev, diff --git a/drivers/dma/lpc18xx-dmamux.c b/drivers/dma/lpc18xx-dmamux.c new file mode 100644 index 000000000000..761f32687055 --- /dev/null +++ b/drivers/dma/lpc18xx-dmamux.c @@ -0,0 +1,183 @@ +/* + * DMA Router driver for LPC18xx/43xx DMA MUX + * + * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com> + * + * Based on TI DMA Crossbar driver by: + * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi <peter.ujfalusi@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/err.h> +#include <linux/init.h> +#include <linux/mfd/syscon.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/regmap.h> +#include <linux/spinlock.h> + +/* CREG register offset and macros for mux manipulation */ +#define LPC18XX_CREG_DMAMUX 0x11c +#define LPC18XX_DMAMUX_VAL(v, n) ((v) << (n * 2)) +#define LPC18XX_DMAMUX_MASK(n) (0x3 << (n * 2)) +#define LPC18XX_DMAMUX_MAX_VAL 0x3 + +struct lpc18xx_dmamux { + u32 value; + bool busy; +}; + +struct lpc18xx_dmamux_data { + struct dma_router dmarouter; + struct lpc18xx_dmamux *muxes; + u32 dma_master_requests; + u32 dma_mux_requests; + struct regmap *reg; + spinlock_t lock; +}; + +static void lpc18xx_dmamux_free(struct device *dev, void *route_data) +{ + struct lpc18xx_dmamux_data *dmamux = dev_get_drvdata(dev); + struct lpc18xx_dmamux *mux = route_data; + unsigned long flags; + + spin_lock_irqsave(&dmamux->lock, flags); + mux->busy = false; + spin_unlock_irqrestore(&dmamux->lock, flags); +} + +static void *lpc18xx_dmamux_reserve(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct lpc18xx_dmamux_data *dmamux = platform_get_drvdata(pdev); + unsigned long flags; + unsigned mux; + + if (dma_spec->args_count != 3) { + dev_err(&pdev->dev, "invalid number of dma mux args\n"); + return ERR_PTR(-EINVAL); + } + + mux = dma_spec->args[0]; + if (mux >= dmamux->dma_master_requests) { + dev_err(&pdev->dev, "invalid mux number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + if (dma_spec->args[1] > LPC18XX_DMAMUX_MAX_VAL) { + dev_err(&pdev->dev, "invalid dma mux value: %d\n", + dma_spec->args[1]); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "can't get dma master\n"); + return ERR_PTR(-EINVAL); + } + + spin_lock_irqsave(&dmamux->lock, flags); + if (dmamux->muxes[mux].busy) { + spin_unlock_irqrestore(&dmamux->lock, flags); + dev_err(&pdev->dev, "dma request %u busy with %u.%u\n", + mux, mux, dmamux->muxes[mux].value); + of_node_put(dma_spec->np); + return ERR_PTR(-EBUSY); + } + + dmamux->muxes[mux].busy = true; + dmamux->muxes[mux].value = dma_spec->args[1]; + + regmap_update_bits(dmamux->reg, LPC18XX_CREG_DMAMUX, + LPC18XX_DMAMUX_MASK(mux), + LPC18XX_DMAMUX_VAL(dmamux->muxes[mux].value, mux)); + spin_unlock_irqrestore(&dmamux->lock, flags); + + dma_spec->args[1] = dma_spec->args[2]; + dma_spec->args_count = 2; + + dev_dbg(&pdev->dev, "mapping dmamux %u.%u to dma request %u\n", mux, + dmamux->muxes[mux].value, mux); + + return &dmamux->muxes[mux]; +} + +static int lpc18xx_dmamux_probe(struct platform_device *pdev) +{ + struct device_node *dma_np, *np = pdev->dev.of_node; + struct lpc18xx_dmamux_data *dmamux; + int ret; + + dmamux = devm_kzalloc(&pdev->dev, sizeof(*dmamux), GFP_KERNEL); + if (!dmamux) + return -ENOMEM; + + dmamux->reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); + if (IS_ERR(dmamux->reg)) { + dev_err(&pdev->dev, "syscon lookup failed\n"); + return PTR_ERR(dmamux->reg); + } + + ret = of_property_read_u32(np, "dma-requests", + &dmamux->dma_mux_requests); + if (ret) { + dev_err(&pdev->dev, "missing dma-requests property\n"); + return ret; + } + + dma_np = of_parse_phandle(np, "dma-masters", 0); + if (!dma_np) { + dev_err(&pdev->dev, "can't get dma master\n"); + return -ENODEV; + } + + ret = of_property_read_u32(dma_np, "dma-requests", + &dmamux->dma_master_requests); + of_node_put(dma_np); + if (ret) { + dev_err(&pdev->dev, "missing master dma-requests property\n"); + return ret; + } + + dmamux->muxes = devm_kcalloc(&pdev->dev, dmamux->dma_master_requests, + sizeof(struct lpc18xx_dmamux), + GFP_KERNEL); + if (!dmamux->muxes) + return -ENOMEM; + + spin_lock_init(&dmamux->lock); + platform_set_drvdata(pdev, dmamux); + dmamux->dmarouter.dev = &pdev->dev; + dmamux->dmarouter.route_free = lpc18xx_dmamux_free; + + return of_dma_router_register(np, lpc18xx_dmamux_reserve, + &dmamux->dmarouter); +} + +static const struct of_device_id lpc18xx_dmamux_match[] = { + { .compatible = "nxp,lpc1850-dmamux" }, + {}, +}; + +static struct platform_driver lpc18xx_dmamux_driver = { + .probe = lpc18xx_dmamux_probe, + .driver = { + .name = "lpc18xx-dmamux", + .of_match_table = lpc18xx_dmamux_match, + }, +}; + +static int __init lpc18xx_dmamux_init(void) +{ + return platform_driver_register(&lpc18xx_dmamux_driver); +} +arch_initcall(lpc18xx_dmamux_init); diff --git a/drivers/dma/mic_x100_dma.h b/drivers/dma/mic_x100_dma.h index f663b0bdd11d..d89982034e68 100644 --- a/drivers/dma/mic_x100_dma.h +++ b/drivers/dma/mic_x100_dma.h @@ -39,7 +39,7 @@ */ #define MIC_DMA_MAX_NUM_CHAN 8 #define MIC_DMA_NUM_CHAN 4 -#define MIC_DMA_ALIGN_SHIFT 6 +#define MIC_DMA_ALIGN_SHIFT DMAENGINE_ALIGN_64_BYTES #define MIC_DMA_ALIGN_BYTES (1 << MIC_DMA_ALIGN_SHIFT) #define MIC_DMA_DESC_RX_SIZE (128 * 1024 - 4) diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index 462a0229a743..e39457f13d4d 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -72,7 +72,6 @@ #define DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */ #define DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ -#define PDMA_ALIGNMENT 3 #define PDMA_MAX_DESC_BYTES DCMD_LENGTH struct mmp_pdma_desc_hw { @@ -1071,7 +1070,7 @@ static int mmp_pdma_probe(struct platform_device *op) pdev->device.device_issue_pending = mmp_pdma_issue_pending; pdev->device.device_config = mmp_pdma_config; pdev->device.device_terminate_all = mmp_pdma_terminate_all; - pdev->device.copy_align = PDMA_ALIGNMENT; + pdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; pdev->device.src_addr_widths = widths; pdev->device.dst_addr_widths = widths; pdev->device.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c index e683761e0f8f..3df0422607d5 100644 --- a/drivers/dma/mmp_tdma.c +++ b/drivers/dma/mmp_tdma.c @@ -100,7 +100,6 @@ enum mmp_tdma_type { PXA910_SQU, }; -#define TDMA_ALIGNMENT 3 #define TDMA_MAX_XFER_BYTES SZ_64K struct mmp_tdma_chan { @@ -695,7 +694,7 @@ static int mmp_tdma_probe(struct platform_device *pdev) tdev->device.device_pause = mmp_tdma_pause_chan; tdev->device.device_resume = mmp_tdma_resume_chan; tdev->device.device_terminate_all = mmp_tdma_terminate_all; - tdev->device.copy_align = TDMA_ALIGNMENT; + tdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); platform_set_drvdata(pdev, tdev); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index fbaf1ead2597..a0e118725ae3 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -26,6 +26,7 @@ #include <linux/of.h> #include <linux/of_irq.h> #include <linux/irqdomain.h> +#include <linux/cpumask.h> #include <linux/platform_data/dma-mv_xor.h> #include "dmaengine.h" @@ -1127,12 +1128,15 @@ static const struct of_device_id mv_xor_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, mv_xor_dt_ids); +static unsigned int mv_xor_engine_count; + static int mv_xor_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram; struct mv_xor_device *xordev; struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev); struct resource *res; + unsigned int max_engines, max_channels; int i, ret; int op_in_desc; @@ -1176,6 +1180,21 @@ static int mv_xor_probe(struct platform_device *pdev) if (!IS_ERR(xordev->clk)) clk_prepare_enable(xordev->clk); + /* + * We don't want to have more than one channel per CPU in + * order for async_tx to perform well. So we limit the number + * of engines and channels so that we take into account this + * constraint. Note that we also want to use channels from + * separate engines when possible. + */ + max_engines = num_present_cpus(); + max_channels = min_t(unsigned int, + MV_XOR_MAX_CHANNELS, + DIV_ROUND_UP(num_present_cpus(), 2)); + + if (mv_xor_engine_count >= max_engines) + return 0; + if (pdev->dev.of_node) { struct device_node *np; int i = 0; @@ -1189,13 +1208,13 @@ static int mv_xor_probe(struct platform_device *pdev) int irq; op_in_desc = (int)of_id->data; + if (i >= max_channels) + continue; + dma_cap_zero(cap_mask); - if (of_property_read_bool(np, "dmacap,memcpy")) - dma_cap_set(DMA_MEMCPY, cap_mask); - if (of_property_read_bool(np, "dmacap,xor")) - dma_cap_set(DMA_XOR, cap_mask); - if (of_property_read_bool(np, "dmacap,interrupt")) - dma_cap_set(DMA_INTERRUPT, cap_mask); + dma_cap_set(DMA_MEMCPY, cap_mask); + dma_cap_set(DMA_XOR, cap_mask); + dma_cap_set(DMA_INTERRUPT, cap_mask); irq = irq_of_parse_and_map(np, 0); if (!irq) { @@ -1215,7 +1234,7 @@ static int mv_xor_probe(struct platform_device *pdev) i++; } } else if (pdata && pdata->channels) { - for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { + for (i = 0; i < max_channels; i++) { struct mv_xor_channel_data *cd; struct mv_xor_chan *chan; int irq; diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index b859792dde95..113605f6fe20 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/dmaengine.h> diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index f513f77b1d85..257e0d90475a 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1198,6 +1198,9 @@ static inline int _loop(unsigned dry_run, u8 buf[], unsigned lcnt0, lcnt1, ljmp0, ljmp1; struct _arg_LPEND lpend; + if (*bursts == 1) + return _bursts(dry_run, buf, pxs, 1); + /* Max iterations possible in DMALP is 256 */ if (*bursts >= 256*256) { lcnt1 = 256; diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index ddcbbf5cd9e9..5cb61ce01036 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -184,19 +184,18 @@ static unsigned int pxad_drcmr(unsigned int line) static int dbg_show_requester_chan(struct seq_file *s, void *p) { - int pos = 0; struct pxad_phy *phy = s->private; int i; u32 drcmr; - pos += seq_printf(s, "DMA channel %d requester :\n", phy->idx); + seq_printf(s, "DMA channel %d requester :\n", phy->idx); for (i = 0; i < 70; i++) { drcmr = readl_relaxed(phy->base + pxad_drcmr(i)); if ((drcmr & DRCMR_CHLNUM) == phy->idx) - pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, - !!(drcmr & DRCMR_MAPVLD)); + seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, + !!(drcmr & DRCMR_MAPVLD)); } - return pos; + return 0; } static inline int dbg_burst_from_dcmd(u32 dcmd) @@ -906,21 +905,21 @@ static void pxad_get_config(struct pxad_chan *chan, enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; *dcmd = 0; - if (chan->cfg.direction == DMA_DEV_TO_MEM) { + if (dir == DMA_DEV_TO_MEM) { maxburst = chan->cfg.src_maxburst; width = chan->cfg.src_addr_width; dev_addr = chan->cfg.src_addr; *dev_src = dev_addr; *dcmd |= PXA_DCMD_INCTRGADDR | PXA_DCMD_FLOWSRC; } - if (chan->cfg.direction == DMA_MEM_TO_DEV) { + if (dir == DMA_MEM_TO_DEV) { maxburst = chan->cfg.dst_maxburst; width = chan->cfg.dst_addr_width; dev_addr = chan->cfg.dst_addr; *dev_dst = dev_addr; *dcmd |= PXA_DCMD_INCSRCADDR | PXA_DCMD_FLOWTRG; } - if (chan->cfg.direction == DMA_MEM_TO_MEM) + if (dir == DMA_MEM_TO_MEM) *dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR | PXA_DCMD_INCSRCADDR; diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c index 8c5186cc9f63..7d5598d874e1 100644 --- a/drivers/dma/sirf-dma.c +++ b/drivers/dma/sirf-dma.c @@ -455,6 +455,7 @@ static int sirfsoc_dma_terminate_all(struct dma_chan *chan) switch (sdma->type) { case SIRFSOC_DMA_VER_A7V1: writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_INT); writel_relaxed((1 << cid) | 1 << (cid + 16), sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7); @@ -462,6 +463,8 @@ static int sirfsoc_dma_terminate_all(struct dma_chan *chan) break; case SIRFSOC_DMA_VER_A7V2: writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7); + writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, + sdma->base + SIRFSOC_DMA_INT_ATLAS7); writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7); break; diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 3c10f034d4b9..750d1b313684 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2853,7 +2853,7 @@ static void d40_ops_init(struct d40_base *base, struct dma_device *dev) * This controller can only access address at even * 32bit boundaries, i.e. 2^2 */ - dev->copy_align = 2; + dev->copy_align = DMAENGINE_ALIGN_4_BYTES; } if (dma_has_cap(DMA_SG, dev->cap_mask)) diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c new file mode 100644 index 000000000000..a1a500d96ff2 --- /dev/null +++ b/drivers/dma/sun4i-dma.c @@ -0,0 +1,1288 @@ +/* + * Copyright (C) 2014 Emilio López + * Emilio López <emilio@elopez.com.ar> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/dmaengine.h> +#include <linux/dmapool.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of_dma.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include "virt-dma.h" + +/** Common macros to normal and dedicated DMA registers **/ + +#define SUN4I_DMA_CFG_LOADING BIT(31) +#define SUN4I_DMA_CFG_DST_DATA_WIDTH(width) ((width) << 25) +#define SUN4I_DMA_CFG_DST_BURST_LENGTH(len) ((len) << 23) +#define SUN4I_DMA_CFG_DST_ADDR_MODE(mode) ((mode) << 21) +#define SUN4I_DMA_CFG_DST_DRQ_TYPE(type) ((type) << 16) +#define SUN4I_DMA_CFG_SRC_DATA_WIDTH(width) ((width) << 9) +#define SUN4I_DMA_CFG_SRC_BURST_LENGTH(len) ((len) << 7) +#define SUN4I_DMA_CFG_SRC_ADDR_MODE(mode) ((mode) << 5) +#define SUN4I_DMA_CFG_SRC_DRQ_TYPE(type) (type) + +/** Normal DMA register values **/ + +/* Normal DMA source/destination data request type values */ +#define SUN4I_NDMA_DRQ_TYPE_SDRAM 0x16 +#define SUN4I_NDMA_DRQ_TYPE_LIMIT (0x1F + 1) + +/** Normal DMA register layout **/ + +/* Dedicated DMA source/destination address mode values */ +#define SUN4I_NDMA_ADDR_MODE_LINEAR 0 +#define SUN4I_NDMA_ADDR_MODE_IO 1 + +/* Normal DMA configuration register layout */ +#define SUN4I_NDMA_CFG_CONT_MODE BIT(30) +#define SUN4I_NDMA_CFG_WAIT_STATE(n) ((n) << 27) +#define SUN4I_NDMA_CFG_DST_NON_SECURE BIT(22) +#define SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15) +#define SUN4I_NDMA_CFG_SRC_NON_SECURE BIT(6) + +/** Dedicated DMA register values **/ + +/* Dedicated DMA source/destination address mode values */ +#define SUN4I_DDMA_ADDR_MODE_LINEAR 0 +#define SUN4I_DDMA_ADDR_MODE_IO 1 +#define SUN4I_DDMA_ADDR_MODE_HORIZONTAL_PAGE 2 +#define SUN4I_DDMA_ADDR_MODE_VERTICAL_PAGE 3 + +/* Dedicated DMA source/destination data request type values */ +#define SUN4I_DDMA_DRQ_TYPE_SDRAM 0x1 +#define SUN4I_DDMA_DRQ_TYPE_LIMIT (0x1F + 1) + +/** Dedicated DMA register layout **/ + +/* Dedicated DMA configuration register layout */ +#define SUN4I_DDMA_CFG_BUSY BIT(30) +#define SUN4I_DDMA_CFG_CONT_MODE BIT(29) +#define SUN4I_DDMA_CFG_DST_NON_SECURE BIT(28) +#define SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15) +#define SUN4I_DDMA_CFG_SRC_NON_SECURE BIT(12) + +/* Dedicated DMA parameter register layout */ +#define SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(n) (((n) - 1) << 24) +#define SUN4I_DDMA_PARA_DST_WAIT_CYCLES(n) (((n) - 1) << 16) +#define SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(n) (((n) - 1) << 8) +#define SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(n) (((n) - 1) << 0) + +/** DMA register offsets **/ + +/* General register offsets */ +#define SUN4I_DMA_IRQ_ENABLE_REG 0x0 +#define SUN4I_DMA_IRQ_PENDING_STATUS_REG 0x4 + +/* Normal DMA register offsets */ +#define SUN4I_NDMA_CHANNEL_REG_BASE(n) (0x100 + (n) * 0x20) +#define SUN4I_NDMA_CFG_REG 0x0 +#define SUN4I_NDMA_SRC_ADDR_REG 0x4 +#define SUN4I_NDMA_DST_ADDR_REG 0x8 +#define SUN4I_NDMA_BYTE_COUNT_REG 0xC + +/* Dedicated DMA register offsets */ +#define SUN4I_DDMA_CHANNEL_REG_BASE(n) (0x300 + (n) * 0x20) +#define SUN4I_DDMA_CFG_REG 0x0 +#define SUN4I_DDMA_SRC_ADDR_REG 0x4 +#define SUN4I_DDMA_DST_ADDR_REG 0x8 +#define SUN4I_DDMA_BYTE_COUNT_REG 0xC +#define SUN4I_DDMA_PARA_REG 0x18 + +/** DMA Driver **/ + +/* + * Normal DMA has 8 channels, and Dedicated DMA has another 8, so + * that's 16 channels. As for endpoints, there's 29 and 21 + * respectively. Given that the Normal DMA endpoints (other than + * SDRAM) can be used as tx/rx, we need 78 vchans in total + */ +#define SUN4I_NDMA_NR_MAX_CHANNELS 8 +#define SUN4I_DDMA_NR_MAX_CHANNELS 8 +#define SUN4I_DMA_NR_MAX_CHANNELS \ + (SUN4I_NDMA_NR_MAX_CHANNELS + SUN4I_DDMA_NR_MAX_CHANNELS) +#define SUN4I_NDMA_NR_MAX_VCHANS (29 * 2 - 1) +#define SUN4I_DDMA_NR_MAX_VCHANS 21 +#define SUN4I_DMA_NR_MAX_VCHANS \ + (SUN4I_NDMA_NR_MAX_VCHANS + SUN4I_DDMA_NR_MAX_VCHANS) + +/* This set of SUN4I_DDMA timing parameters were found experimentally while + * working with the SPI driver and seem to make it behave correctly */ +#define SUN4I_DDMA_MAGIC_SPI_PARAMETERS \ + (SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(1) | \ + SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(1) | \ + SUN4I_DDMA_PARA_DST_WAIT_CYCLES(2) | \ + SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(2)) + +struct sun4i_dma_pchan { + /* Register base of channel */ + void __iomem *base; + /* vchan currently being serviced */ + struct sun4i_dma_vchan *vchan; + /* Is this a dedicated pchan? */ + int is_dedicated; +}; + +struct sun4i_dma_vchan { + struct virt_dma_chan vc; + struct dma_slave_config cfg; + struct sun4i_dma_pchan *pchan; + struct sun4i_dma_promise *processing; + struct sun4i_dma_contract *contract; + u8 endpoint; + int is_dedicated; +}; + +struct sun4i_dma_promise { + u32 cfg; + u32 para; + dma_addr_t src; + dma_addr_t dst; + size_t len; + struct list_head list; +}; + +/* A contract is a set of promises */ +struct sun4i_dma_contract { + struct virt_dma_desc vd; + struct list_head demands; + struct list_head completed_demands; + int is_cyclic; +}; + +struct sun4i_dma_dev { + DECLARE_BITMAP(pchans_used, SUN4I_DMA_NR_MAX_CHANNELS); + struct dma_device slave; + struct sun4i_dma_pchan *pchans; + struct sun4i_dma_vchan *vchans; + void __iomem *base; + struct clk *clk; + int irq; + spinlock_t lock; +}; + +static struct sun4i_dma_dev *to_sun4i_dma_dev(struct dma_device *dev) +{ + return container_of(dev, struct sun4i_dma_dev, slave); +} + +static struct sun4i_dma_vchan *to_sun4i_dma_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct sun4i_dma_vchan, vc.chan); +} + +static struct sun4i_dma_contract *to_sun4i_dma_contract(struct virt_dma_desc *vd) +{ + return container_of(vd, struct sun4i_dma_contract, vd); +} + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static int convert_burst(u32 maxburst) +{ + if (maxburst > 8) + return -EINVAL; + + /* 1 -> 0, 4 -> 1, 8 -> 2 */ + return (maxburst >> 2); +} + +static int convert_buswidth(enum dma_slave_buswidth addr_width) +{ + if (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES) + return -EINVAL; + + /* 8 (1 byte) -> 0, 16 (2 bytes) -> 1, 32 (4 bytes) -> 2 */ + return (addr_width >> 1); +} + +static void sun4i_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + + vchan_free_chan_resources(&vchan->vc); +} + +static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv, + struct sun4i_dma_vchan *vchan) +{ + struct sun4i_dma_pchan *pchan = NULL, *pchans = priv->pchans; + unsigned long flags; + int i, max; + + /* + * pchans 0-SUN4I_NDMA_NR_MAX_CHANNELS are normal, and + * SUN4I_NDMA_NR_MAX_CHANNELS+ are dedicated ones + */ + if (vchan->is_dedicated) { + i = SUN4I_NDMA_NR_MAX_CHANNELS; + max = SUN4I_DMA_NR_MAX_CHANNELS; + } else { + i = 0; + max = SUN4I_NDMA_NR_MAX_CHANNELS; + } + + spin_lock_irqsave(&priv->lock, flags); + for_each_clear_bit_from(i, &priv->pchans_used, max) { + pchan = &pchans[i]; + pchan->vchan = vchan; + set_bit(i, priv->pchans_used); + break; + } + spin_unlock_irqrestore(&priv->lock, flags); + + return pchan; +} + +static void release_pchan(struct sun4i_dma_dev *priv, + struct sun4i_dma_pchan *pchan) +{ + unsigned long flags; + int nr = pchan - priv->pchans; + + spin_lock_irqsave(&priv->lock, flags); + + pchan->vchan = NULL; + clear_bit(nr, priv->pchans_used); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static void configure_pchan(struct sun4i_dma_pchan *pchan, + struct sun4i_dma_promise *d) +{ + /* + * Configure addresses and misc parameters depending on type + * SUN4I_DDMA has an extra field with timing parameters + */ + if (pchan->is_dedicated) { + writel_relaxed(d->src, pchan->base + SUN4I_DDMA_SRC_ADDR_REG); + writel_relaxed(d->dst, pchan->base + SUN4I_DDMA_DST_ADDR_REG); + writel_relaxed(d->len, pchan->base + SUN4I_DDMA_BYTE_COUNT_REG); + writel_relaxed(d->para, pchan->base + SUN4I_DDMA_PARA_REG); + writel_relaxed(d->cfg, pchan->base + SUN4I_DDMA_CFG_REG); + } else { + writel_relaxed(d->src, pchan->base + SUN4I_NDMA_SRC_ADDR_REG); + writel_relaxed(d->dst, pchan->base + SUN4I_NDMA_DST_ADDR_REG); + writel_relaxed(d->len, pchan->base + SUN4I_NDMA_BYTE_COUNT_REG); + writel_relaxed(d->cfg, pchan->base + SUN4I_NDMA_CFG_REG); + } +} + +static void set_pchan_interrupt(struct sun4i_dma_dev *priv, + struct sun4i_dma_pchan *pchan, + int half, int end) +{ + u32 reg; + int pchan_number = pchan - priv->pchans; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + reg = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + + if (half) + reg |= BIT(pchan_number * 2); + else + reg &= ~BIT(pchan_number * 2); + + if (end) + reg |= BIT(pchan_number * 2 + 1); + else + reg &= ~BIT(pchan_number * 2 + 1); + + writel_relaxed(reg, priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +/** + * Execute pending operations on a vchan + * + * When given a vchan, this function will try to acquire a suitable + * pchan and, if successful, will configure it to fulfill a promise + * from the next pending contract. + * + * This function must be called with &vchan->vc.lock held. + */ +static int __execute_vchan_pending(struct sun4i_dma_dev *priv, + struct sun4i_dma_vchan *vchan) +{ + struct sun4i_dma_promise *promise = NULL; + struct sun4i_dma_contract *contract = NULL; + struct sun4i_dma_pchan *pchan; + struct virt_dma_desc *vd; + int ret; + + lockdep_assert_held(&vchan->vc.lock); + + /* We need a pchan to do anything, so secure one if available */ + pchan = find_and_use_pchan(priv, vchan); + if (!pchan) + return -EBUSY; + + /* + * Channel endpoints must not be repeated, so if this vchan + * has already submitted some work, we can't do anything else + */ + if (vchan->processing) { + dev_dbg(chan2dev(&vchan->vc.chan), + "processing something to this endpoint already\n"); + ret = -EBUSY; + goto release_pchan; + } + + do { + /* Figure out which contract we're working with today */ + vd = vchan_next_desc(&vchan->vc); + if (!vd) { + dev_dbg(chan2dev(&vchan->vc.chan), + "No pending contract found"); + ret = 0; + goto release_pchan; + } + + contract = to_sun4i_dma_contract(vd); + if (list_empty(&contract->demands)) { + /* The contract has been completed so mark it as such */ + list_del(&contract->vd.node); + vchan_cookie_complete(&contract->vd); + dev_dbg(chan2dev(&vchan->vc.chan), + "Empty contract found and marked complete"); + } + } while (list_empty(&contract->demands)); + + /* Now find out what we need to do */ + promise = list_first_entry(&contract->demands, + struct sun4i_dma_promise, list); + vchan->processing = promise; + + /* ... and make it reality */ + if (promise) { + vchan->contract = contract; + vchan->pchan = pchan; + set_pchan_interrupt(priv, pchan, contract->is_cyclic, 1); + configure_pchan(pchan, promise); + } + + return 0; + +release_pchan: + release_pchan(priv, pchan); + return ret; +} + +static int sanitize_config(struct dma_slave_config *sconfig, + enum dma_transfer_direction direction) +{ + switch (direction) { + case DMA_MEM_TO_DEV: + if ((sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) || + !sconfig->dst_maxburst) + return -EINVAL; + + if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + sconfig->src_addr_width = sconfig->dst_addr_width; + + if (!sconfig->src_maxburst) + sconfig->src_maxburst = sconfig->dst_maxburst; + + break; + + case DMA_DEV_TO_MEM: + if ((sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) || + !sconfig->src_maxburst) + return -EINVAL; + + if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + sconfig->dst_addr_width = sconfig->src_addr_width; + + if (!sconfig->dst_maxburst) + sconfig->dst_maxburst = sconfig->src_maxburst; + + break; + default: + return 0; + } + + return 0; +} + +/** + * Generate a promise, to be used in a normal DMA contract. + * + * A NDMA promise contains all the information required to program the + * normal part of the DMA Engine and get data copied. A non-executed + * promise will live in the demands list on a contract. Once it has been + * completed, it will be moved to the completed demands list for later freeing. + * All linked promises will be freed when the corresponding contract is freed + */ +static struct sun4i_dma_promise * +generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, + size_t len, struct dma_slave_config *sconfig, + enum dma_transfer_direction direction) +{ + struct sun4i_dma_promise *promise; + int ret; + + ret = sanitize_config(sconfig, direction); + if (ret) + return NULL; + + promise = kzalloc(sizeof(*promise), GFP_NOWAIT); + if (!promise) + return NULL; + + promise->src = src; + promise->dst = dest; + promise->len = len; + promise->cfg = SUN4I_DMA_CFG_LOADING | + SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN; + + dev_dbg(chan2dev(chan), + "src burst %d, dst burst %d, src buswidth %d, dst buswidth %d", + sconfig->src_maxburst, sconfig->dst_maxburst, + sconfig->src_addr_width, sconfig->dst_addr_width); + + /* Source burst */ + ret = convert_burst(sconfig->src_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); + + /* Destination burst */ + ret = convert_burst(sconfig->dst_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); + + /* Source bus width */ + ret = convert_buswidth(sconfig->src_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + + /* Destination bus width */ + ret = convert_buswidth(sconfig->dst_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + + return promise; + +fail: + kfree(promise); + return NULL; +} + +/** + * Generate a promise, to be used in a dedicated DMA contract. + * + * A DDMA promise contains all the information required to program the + * Dedicated part of the DMA Engine and get data copied. A non-executed + * promise will live in the demands list on a contract. Once it has been + * completed, it will be moved to the completed demands list for later freeing. + * All linked promises will be freed when the corresponding contract is freed + */ +static struct sun4i_dma_promise * +generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest, + size_t len, struct dma_slave_config *sconfig) +{ + struct sun4i_dma_promise *promise; + int ret; + + promise = kzalloc(sizeof(*promise), GFP_NOWAIT); + if (!promise) + return NULL; + + promise->src = src; + promise->dst = dest; + promise->len = len; + promise->cfg = SUN4I_DMA_CFG_LOADING | + SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN; + + /* Source burst */ + ret = convert_burst(sconfig->src_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret); + + /* Destination burst */ + ret = convert_burst(sconfig->dst_maxburst); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret); + + /* Source bus width */ + ret = convert_buswidth(sconfig->src_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret); + + /* Destination bus width */ + ret = convert_buswidth(sconfig->dst_addr_width); + if (IS_ERR_VALUE(ret)) + goto fail; + promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret); + + return promise; + +fail: + kfree(promise); + return NULL; +} + +/** + * Generate a contract + * + * Contracts function as DMA descriptors. As our hardware does not support + * linked lists, we need to implement SG via software. We use a contract + * to hold all the pieces of the request and process them serially one + * after another. Each piece is represented as a promise. + */ +static struct sun4i_dma_contract *generate_dma_contract(void) +{ + struct sun4i_dma_contract *contract; + + contract = kzalloc(sizeof(*contract), GFP_NOWAIT); + if (!contract) + return NULL; + + INIT_LIST_HEAD(&contract->demands); + INIT_LIST_HEAD(&contract->completed_demands); + + return contract; +} + +/** + * Get next promise on a cyclic transfer + * + * Cyclic contracts contain a series of promises which are executed on a + * loop. This function returns the next promise from a cyclic contract, + * so it can be programmed into the hardware. + */ +static struct sun4i_dma_promise * +get_next_cyclic_promise(struct sun4i_dma_contract *contract) +{ + struct sun4i_dma_promise *promise; + + promise = list_first_entry_or_null(&contract->demands, + struct sun4i_dma_promise, list); + if (!promise) { + list_splice_init(&contract->completed_demands, + &contract->demands); + promise = list_first_entry(&contract->demands, + struct sun4i_dma_promise, list); + } + + return promise; +} + +/** + * Free a contract and all its associated promises + */ +static void sun4i_dma_free_contract(struct virt_dma_desc *vd) +{ + struct sun4i_dma_contract *contract = to_sun4i_dma_contract(vd); + struct sun4i_dma_promise *promise; + + /* Free all the demands and completed demands */ + list_for_each_entry(promise, &contract->demands, list) + kfree(promise); + + list_for_each_entry(promise, &contract->completed_demands, list) + kfree(promise); + + kfree(contract); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + /* + * We can only do the copy to bus aligned addresses, so + * choose the best one so we get decent performance. We also + * maximize the burst size for this same reason. + */ + sconfig->src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + sconfig->dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + sconfig->src_maxburst = 8; + sconfig->dst_maxburst = 8; + + if (vchan->is_dedicated) + promise = generate_ddma_promise(chan, src, dest, len, sconfig); + else + promise = generate_ndma_promise(chan, src, dest, len, sconfig, + DMA_MEM_TO_MEM); + + if (!promise) { + kfree(contract); + return NULL; + } + + /* Configure memcpy mode */ + if (vchan->is_dedicated) { + promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM); + } else { + promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM); + } + + /* Fill the contract with our only promise */ + list_add_tail(&promise->list, &contract->demands); + + /* And add it to the vchan */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len, + size_t period_len, enum dma_transfer_direction dir, + unsigned long flags) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + dma_addr_t src, dest; + u32 endpoints; + int nr_periods, offset, plength, i; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + if (vchan->is_dedicated) { + /* + * As we are using this just for audio data, we need to use + * normal DMA. There is nothing stopping us from supporting + * dedicated DMA here as well, so if a client comes up and + * requires it, it will be simple to implement it. + */ + dev_err(chan2dev(chan), + "Cyclic transfers are only supported on Normal DMA\n"); + return NULL; + } + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + contract->is_cyclic = 1; + + /* Figure out the endpoints and the address we need */ + if (dir == DMA_MEM_TO_DEV) { + src = buf; + dest = sconfig->dst_addr; + endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_DST_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO); + } else { + src = sconfig->src_addr; + dest = buf; + endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO) | + SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM); + } + + /* + * We will be using half done interrupts to make two periods + * out of a promise, so we need to program the DMA engine less + * often + */ + + /* + * The engine can interrupt on half-transfer, so we can use + * this feature to program the engine half as often as if we + * didn't use it (keep in mind the hardware doesn't support + * linked lists). + * + * Say you have a set of periods (| marks the start/end, I for + * interrupt, P for programming the engine to do a new + * transfer), the easy but slow way would be to do + * + * |---|---|---|---| (periods / promises) + * P I,P I,P I,P I + * + * Using half transfer interrupts you can do + * + * |-------|-------| (promises as configured on hw) + * |---|---|---|---| (periods) + * P I I,P I I + * + * Which requires half the engine programming for the same + * functionality. + */ + nr_periods = DIV_ROUND_UP(len / period_len, 2); + for (i = 0; i < nr_periods; i++) { + /* Calculate the offset in the buffer and the length needed */ + offset = i * period_len * 2; + plength = min((len - offset), (period_len * 2)); + if (dir == DMA_MEM_TO_DEV) + src = buf + offset; + else + dest = buf + offset; + + /* Make the promise */ + promise = generate_ndma_promise(chan, src, dest, + plength, sconfig, dir); + if (!promise) { + /* TODO: should we free everything? */ + return NULL; + } + promise->cfg |= endpoints; + + /* Then add it to the contract */ + list_add_tail(&promise->list, &contract->demands); + } + + /* And add it to the vchan */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static struct dma_async_tx_descriptor * +sun4i_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun4i_dma_promise *promise; + struct sun4i_dma_contract *contract; + u8 ram_type, io_mode, linear_mode; + struct scatterlist *sg; + dma_addr_t srcaddr, dstaddr; + u32 endpoints, para; + int i; + + if (!sgl) + return NULL; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + contract = generate_dma_contract(); + if (!contract) + return NULL; + + if (vchan->is_dedicated) { + io_mode = SUN4I_DDMA_ADDR_MODE_IO; + linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM; + } else { + io_mode = SUN4I_NDMA_ADDR_MODE_IO; + linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR; + ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM; + } + + if (dir == DMA_MEM_TO_DEV) + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(linear_mode); + else + endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) | + SUN4I_DMA_CFG_DST_ADDR_MODE(linear_mode) | + SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) | + SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode); + + for_each_sg(sgl, sg, sg_len, i) { + /* Figure out addresses */ + if (dir == DMA_MEM_TO_DEV) { + srcaddr = sg_dma_address(sg); + dstaddr = sconfig->dst_addr; + } else { + srcaddr = sconfig->src_addr; + dstaddr = sg_dma_address(sg); + } + + /* + * These are the magic DMA engine timings that keep SPI going. + * I haven't seen any interface on DMAEngine to configure + * timings, and so far they seem to work for everything we + * support, so I've kept them here. I don't know if other + * devices need different timings because, as usual, we only + * have the "para" bitfield meanings, but no comment on what + * the values should be when doing a certain operation :| + */ + para = SUN4I_DDMA_MAGIC_SPI_PARAMETERS; + + /* And make a suitable promise */ + if (vchan->is_dedicated) + promise = generate_ddma_promise(chan, srcaddr, dstaddr, + sg_dma_len(sg), + sconfig); + else + promise = generate_ndma_promise(chan, srcaddr, dstaddr, + sg_dma_len(sg), + sconfig, dir); + + if (!promise) + return NULL; /* TODO: should we free everything? */ + + promise->cfg |= endpoints; + promise->para = para; + + /* Then add it to the contract */ + list_add_tail(&promise->list, &contract->demands); + } + + /* + * Once we've got all the promises ready, add the contract + * to the pending list on the vchan + */ + return vchan_tx_prep(&vchan->vc, &contract->vd, flags); +} + +static int sun4i_dma_terminate_all(struct dma_chan *chan) +{ + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct sun4i_dma_pchan *pchan = vchan->pchan; + LIST_HEAD(head); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + vchan_get_all_descriptors(&vchan->vc, &head); + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + /* + * Clearing the configuration register will halt the pchan. Interrupts + * may still trigger, so don't forget to disable them. + */ + if (pchan) { + if (pchan->is_dedicated) + writel(0, pchan->base + SUN4I_DDMA_CFG_REG); + else + writel(0, pchan->base + SUN4I_NDMA_CFG_REG); + set_pchan_interrupt(priv, pchan, 0, 0); + release_pchan(priv, pchan); + } + + spin_lock_irqsave(&vchan->vc.lock, flags); + vchan_dma_desc_free_list(&vchan->vc, &head); + /* Clear these so the vchan is usable again */ + vchan->processing = NULL; + vchan->pchan = NULL; + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return 0; +} + +static int sun4i_dma_config(struct dma_chan *chan, + struct dma_slave_config *config) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + + memcpy(&vchan->cfg, config, sizeof(*config)); + + return 0; +} + +static struct dma_chan *sun4i_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct sun4i_dma_dev *priv = ofdma->of_dma_data; + struct sun4i_dma_vchan *vchan; + struct dma_chan *chan; + u8 is_dedicated = dma_spec->args[0]; + u8 endpoint = dma_spec->args[1]; + + /* Check if type is Normal or Dedicated */ + if (is_dedicated != 0 && is_dedicated != 1) + return NULL; + + /* Make sure the endpoint looks sane */ + if ((is_dedicated && endpoint >= SUN4I_DDMA_DRQ_TYPE_LIMIT) || + (!is_dedicated && endpoint >= SUN4I_NDMA_DRQ_TYPE_LIMIT)) + return NULL; + + chan = dma_get_any_slave_channel(&priv->slave); + if (!chan) + return NULL; + + /* Assign the endpoint to the vchan */ + vchan = to_sun4i_dma_vchan(chan); + vchan->is_dedicated = is_dedicated; + vchan->endpoint = endpoint; + + return chan; +} + +static enum dma_status sun4i_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + struct sun4i_dma_pchan *pchan = vchan->pchan; + struct sun4i_dma_contract *contract; + struct sun4i_dma_promise *promise; + struct virt_dma_desc *vd; + unsigned long flags; + enum dma_status ret; + size_t bytes = 0; + + ret = dma_cookie_status(chan, cookie, state); + if (!state || (ret == DMA_COMPLETE)) + return ret; + + spin_lock_irqsave(&vchan->vc.lock, flags); + vd = vchan_find_desc(&vchan->vc, cookie); + if (!vd) + goto exit; + contract = to_sun4i_dma_contract(vd); + + list_for_each_entry(promise, &contract->demands, list) + bytes += promise->len; + + /* + * The hardware is configured to return the remaining byte + * quantity. If possible, replace the first listed element's + * full size with the actual remaining amount + */ + promise = list_first_entry_or_null(&contract->demands, + struct sun4i_dma_promise, list); + if (promise && pchan) { + bytes -= promise->len; + if (pchan->is_dedicated) + bytes += readl(pchan->base + SUN4I_DDMA_BYTE_COUNT_REG); + else + bytes += readl(pchan->base + SUN4I_NDMA_BYTE_COUNT_REG); + } + +exit: + + dma_set_residue(state, bytes); + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + return ret; +} + +static void sun4i_dma_issue_pending(struct dma_chan *chan) +{ + struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device); + struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + /* + * If there are pending transactions for this vchan, push one of + * them into the engine to get the ball rolling. + */ + if (vchan_issue_pending(&vchan->vc)) + __execute_vchan_pending(priv, vchan); + + spin_unlock_irqrestore(&vchan->vc.lock, flags); +} + +static irqreturn_t sun4i_dma_interrupt(int irq, void *dev_id) +{ + struct sun4i_dma_dev *priv = dev_id; + struct sun4i_dma_pchan *pchans = priv->pchans, *pchan; + struct sun4i_dma_vchan *vchan; + struct sun4i_dma_contract *contract; + struct sun4i_dma_promise *promise; + unsigned long pendirq, irqs, disableirqs; + int bit, i, free_room, allow_mitigation = 1; + + pendirq = readl_relaxed(priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + +handle_pending: + + disableirqs = 0; + free_room = 0; + + for_each_set_bit(bit, &pendirq, 32) { + pchan = &pchans[bit >> 1]; + vchan = pchan->vchan; + if (!vchan) /* a terminated channel may still interrupt */ + continue; + contract = vchan->contract; + + /* + * Disable the IRQ and free the pchan if it's an end + * interrupt (odd bit) + */ + if (bit & 1) { + spin_lock(&vchan->vc.lock); + + /* + * Move the promise into the completed list now that + * we're done with it + */ + list_del(&vchan->processing->list); + list_add_tail(&vchan->processing->list, + &contract->completed_demands); + + /* + * Cyclic DMA transfers are special: + * - There's always something we can dispatch + * - We need to run the callback + * - Latency is very important, as this is used by audio + * We therefore just cycle through the list and dispatch + * whatever we have here, reusing the pchan. There's + * no need to run the thread after this. + * + * For non-cyclic transfers we need to look around, + * so we can program some more work, or notify the + * client that their transfers have been completed. + */ + if (contract->is_cyclic) { + promise = get_next_cyclic_promise(contract); + vchan->processing = promise; + configure_pchan(pchan, promise); + vchan_cyclic_callback(&contract->vd); + } else { + vchan->processing = NULL; + vchan->pchan = NULL; + + free_room = 1; + disableirqs |= BIT(bit); + release_pchan(priv, pchan); + } + + spin_unlock(&vchan->vc.lock); + } else { + /* Half done interrupt */ + if (contract->is_cyclic) + vchan_cyclic_callback(&contract->vd); + else + disableirqs |= BIT(bit); + } + } + + /* Disable the IRQs for events we handled */ + spin_lock(&priv->lock); + irqs = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + writel_relaxed(irqs & ~disableirqs, + priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + spin_unlock(&priv->lock); + + /* Writing 1 to the pending field will clear the pending interrupt */ + writel_relaxed(pendirq, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + + /* + * If a pchan was freed, we may be able to schedule something else, + * so have a look around + */ + if (free_room) { + for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) { + vchan = &priv->vchans[i]; + spin_lock(&vchan->vc.lock); + __execute_vchan_pending(priv, vchan); + spin_unlock(&vchan->vc.lock); + } + } + + /* + * Handle newer interrupts if some showed up, but only do it once + * to avoid a too long a loop + */ + if (allow_mitigation) { + pendirq = readl_relaxed(priv->base + + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + if (pendirq) { + allow_mitigation = 0; + goto handle_pending; + } + } + + return IRQ_HANDLED; +} + +static int sun4i_dma_probe(struct platform_device *pdev) +{ + struct sun4i_dma_dev *priv; + struct resource *res; + int i, j, ret; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); + + priv->irq = platform_get_irq(pdev, 0); + if (priv->irq < 0) { + dev_err(&pdev->dev, "Cannot claim IRQ\n"); + return priv->irq; + } + + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "No clock specified\n"); + return PTR_ERR(priv->clk); + } + + platform_set_drvdata(pdev, priv); + spin_lock_init(&priv->lock); + + dma_cap_zero(priv->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, priv->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, priv->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, priv->slave.cap_mask); + dma_cap_set(DMA_SLAVE, priv->slave.cap_mask); + + INIT_LIST_HEAD(&priv->slave.channels); + priv->slave.device_free_chan_resources = sun4i_dma_free_chan_resources; + priv->slave.device_tx_status = sun4i_dma_tx_status; + priv->slave.device_issue_pending = sun4i_dma_issue_pending; + priv->slave.device_prep_slave_sg = sun4i_dma_prep_slave_sg; + priv->slave.device_prep_dma_memcpy = sun4i_dma_prep_dma_memcpy; + priv->slave.device_prep_dma_cyclic = sun4i_dma_prep_dma_cyclic; + priv->slave.device_config = sun4i_dma_config; + priv->slave.device_terminate_all = sun4i_dma_terminate_all; + priv->slave.copy_align = 2; + priv->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + priv->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + priv->slave.directions = BIT(DMA_DEV_TO_MEM) | + BIT(DMA_MEM_TO_DEV); + priv->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + priv->slave.dev = &pdev->dev; + + priv->pchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_CHANNELS, + sizeof(struct sun4i_dma_pchan), GFP_KERNEL); + priv->vchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_VCHANS, + sizeof(struct sun4i_dma_vchan), GFP_KERNEL); + if (!priv->vchans || !priv->pchans) + return -ENOMEM; + + /* + * [0..SUN4I_NDMA_NR_MAX_CHANNELS) are normal pchans, and + * [SUN4I_NDMA_NR_MAX_CHANNELS..SUN4I_DMA_NR_MAX_CHANNELS) are + * dedicated ones + */ + for (i = 0; i < SUN4I_NDMA_NR_MAX_CHANNELS; i++) + priv->pchans[i].base = priv->base + + SUN4I_NDMA_CHANNEL_REG_BASE(i); + + for (j = 0; i < SUN4I_DMA_NR_MAX_CHANNELS; i++, j++) { + priv->pchans[i].base = priv->base + + SUN4I_DDMA_CHANNEL_REG_BASE(j); + priv->pchans[i].is_dedicated = 1; + } + + for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) { + struct sun4i_dma_vchan *vchan = &priv->vchans[i]; + + spin_lock_init(&vchan->vc.lock); + vchan->vc.desc_free = sun4i_dma_free_contract; + vchan_init(&vchan->vc, &priv->slave); + } + + ret = clk_prepare_enable(priv->clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the clock\n"); + return ret; + } + + /* + * Make sure the IRQs are all disabled and accounted for. The bootloader + * likes to leave these dirty + */ + writel(0, priv->base + SUN4I_DMA_IRQ_ENABLE_REG); + writel(0xFFFFFFFF, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG); + + ret = devm_request_irq(&pdev->dev, priv->irq, sun4i_dma_interrupt, + 0, dev_name(&pdev->dev), priv); + if (ret) { + dev_err(&pdev->dev, "Cannot request IRQ\n"); + goto err_clk_disable; + } + + ret = dma_async_device_register(&priv->slave); + if (ret) { + dev_warn(&pdev->dev, "Failed to register DMA engine device\n"); + goto err_clk_disable; + } + + ret = of_dma_controller_register(pdev->dev.of_node, sun4i_dma_of_xlate, + priv); + if (ret) { + dev_err(&pdev->dev, "of_dma_controller_register failed\n"); + goto err_dma_unregister; + } + + dev_dbg(&pdev->dev, "Successfully probed SUN4I_DMA\n"); + + return 0; + +err_dma_unregister: + dma_async_device_unregister(&priv->slave); +err_clk_disable: + clk_disable_unprepare(priv->clk); + return ret; +} + +static int sun4i_dma_remove(struct platform_device *pdev) +{ + struct sun4i_dma_dev *priv = platform_get_drvdata(pdev); + + /* Disable IRQ so no more work is scheduled */ + disable_irq(priv->irq); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&priv->slave); + + clk_disable_unprepare(priv->clk); + + return 0; +} + +static const struct of_device_id sun4i_dma_match[] = { + { .compatible = "allwinner,sun4i-a10-dma" }, + { /* sentinel */ }, +}; + +static struct platform_driver sun4i_dma_driver = { + .probe = sun4i_dma_probe, + .remove = sun4i_dma_remove, + .driver = { + .name = "sun4i-dma", + .of_match_table = sun4i_dma_match, + }, +}; + +module_platform_driver(sun4i_dma_driver); + +MODULE_DESCRIPTION("Allwinner A10 Dedicated DMA Controller Driver"); +MODULE_AUTHOR("Emilio López <emilio@elopez.com.ar>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 842ff97c2cfb..73e0be6e2100 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -969,7 +969,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) sdc->slave.device_issue_pending = sun6i_dma_issue_pending; sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; - sdc->slave.copy_align = 4; + sdc->slave.copy_align = DMAENGINE_ALIGN_4_BYTES; sdc->slave.device_config = sun6i_dma_config; sdc->slave.device_pause = sun6i_dma_pause; sdc->slave.device_resume = sun6i_dma_resume; diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index eaf585e8286b..c8f79dcaaee8 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -155,7 +155,6 @@ struct tegra_dma_sg_req { int req_len; bool configured; bool last_sg; - bool half_done; struct list_head node; struct tegra_dma_desc *dma_desc; }; @@ -188,7 +187,7 @@ struct tegra_dma_channel { bool config_init; int id; int irq; - unsigned long chan_base_offset; + void __iomem *chan_addr; spinlock_t lock; bool busy; struct tegra_dma *tdma; @@ -203,8 +202,6 @@ struct tegra_dma_channel { /* ISR handler and tasklet for bottom half of isr handling */ dma_isr_handler isr_handler; struct tasklet_struct tasklet; - dma_async_tx_callback callback; - void *callback_param; /* Channel-slave specific configuration */ unsigned int slave_id; @@ -222,6 +219,13 @@ struct tegra_dma { void __iomem *base_addr; const struct tegra_dma_chip_data *chip_data; + /* + * Counter for managing global pausing of the DMA controller. + * Only applicable for devices that don't support individual + * channel pausing. + */ + u32 global_pause_count; + /* Some register need to be cache before suspend */ u32 reg_gen; @@ -242,12 +246,12 @@ static inline u32 tdma_read(struct tegra_dma *tdma, u32 reg) static inline void tdc_write(struct tegra_dma_channel *tdc, u32 reg, u32 val) { - writel(val, tdc->tdma->base_addr + tdc->chan_base_offset + reg); + writel(val, tdc->chan_addr + reg); } static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg) { - return readl(tdc->tdma->base_addr + tdc->chan_base_offset + reg); + return readl(tdc->chan_addr + reg); } static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc) @@ -361,16 +365,32 @@ static void tegra_dma_global_pause(struct tegra_dma_channel *tdc, struct tegra_dma *tdma = tdc->tdma; spin_lock(&tdma->global_lock); - tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0); - if (wait_for_burst_complete) - udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + + if (tdc->tdma->global_pause_count == 0) { + tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0); + if (wait_for_burst_complete) + udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME); + } + + tdc->tdma->global_pause_count++; + + spin_unlock(&tdma->global_lock); } static void tegra_dma_global_resume(struct tegra_dma_channel *tdc) { struct tegra_dma *tdma = tdc->tdma; - tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE); + spin_lock(&tdma->global_lock); + + if (WARN_ON(tdc->tdma->global_pause_count == 0)) + goto out; + + if (--tdc->tdma->global_pause_count == 0) + tdma_write(tdma, TEGRA_APBDMA_GENERAL, + TEGRA_APBDMA_GENERAL_ENABLE); + +out: spin_unlock(&tdma->global_lock); } @@ -601,7 +621,6 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc, return; tdc_start_head_req(tdc); - return; } static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc, @@ -628,7 +647,6 @@ static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc, if (!st) dma_desc->dma_status = DMA_ERROR; } - return; } static void tegra_dma_tasklet(unsigned long data) @@ -720,7 +738,6 @@ static void tegra_dma_issue_pending(struct dma_chan *dc) } end: spin_unlock_irqrestore(&tdc->lock, flags); - return; } static int tegra_dma_terminate_all(struct dma_chan *dc) @@ -932,7 +949,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg( struct tegra_dma_sg_req *sg_req = NULL; u32 burst_size; enum dma_slave_buswidth slave_bw; - int ret; if (!tdc->config_init) { dev_err(tdc2dev(tdc), "dma channel is not configured\n"); @@ -943,9 +959,8 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg( return NULL; } - ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, - &burst_size, &slave_bw); - if (ret < 0) + if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, + &burst_size, &slave_bw) < 0) return NULL; INIT_LIST_HEAD(&req_list); @@ -1048,7 +1063,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( dma_addr_t mem = buf_addr; u32 burst_size; enum dma_slave_buswidth slave_bw; - int ret; if (!buf_len || !period_len) { dev_err(tdc2dev(tdc), "Invalid buffer/period len\n"); @@ -1087,12 +1101,10 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( return NULL; } - ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, - &burst_size, &slave_bw); - if (ret < 0) + if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr, + &burst_size, &slave_bw) < 0) return NULL; - ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB; ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE << TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT; @@ -1136,7 +1148,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( sg_req->ch_regs.apb_seq = apb_seq; sg_req->ch_regs.ahb_seq = ahb_seq; sg_req->configured = false; - sg_req->half_done = false; sg_req->last_sg = false; sg_req->dma_desc = dma_desc; sg_req->req_len = len; @@ -1377,8 +1388,9 @@ static int tegra_dma_probe(struct platform_device *pdev) for (i = 0; i < cdata->nr_channels; i++) { struct tegra_dma_channel *tdc = &tdma->channels[i]; - tdc->chan_base_offset = TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET + - i * cdata->channel_reg_size; + tdc->chan_addr = tdma->base_addr + + TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET + + (i * cdata->channel_reg_size); res = platform_get_resource(pdev, IORESOURCE_IRQ, i); if (!res) { @@ -1418,6 +1430,7 @@ static int tegra_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); + tdma->global_pause_count = 0; tdma->dma_dev.dev = &pdev->dev; tdma->dma_dev.device_alloc_chan_resources = tegra_dma_alloc_chan_resources; diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c index c4c3d93fdd1b..559cd4073698 100644 --- a/drivers/dma/timb_dma.c +++ b/drivers/dma/timb_dma.c @@ -10,10 +10,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* Supports: diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index 620fd55ec766..0b82bc00b83a 100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -21,6 +21,7 @@ * NOTE: PM support is currently not available. */ +#include <linux/acpi.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/dma-mapping.h> @@ -150,7 +151,6 @@ #define XGENE_DMA_PQ_CHANNEL 1 #define XGENE_DMA_MAX_BYTE_CNT 0x4000 /* 16 KB */ #define XGENE_DMA_MAX_64B_DESC_BYTE_CNT 0x14000 /* 80 KB */ -#define XGENE_DMA_XOR_ALIGNMENT 6 /* 64 Bytes */ #define XGENE_DMA_MAX_XOR_SRC 5 #define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0 #define XGENE_DMA_INVALID_LEN_CODE 0x7800000000000000ULL @@ -763,12 +763,17 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) struct xgene_dma_ring *ring = &chan->rx_ring; struct xgene_dma_desc_sw *desc_sw, *_desc_sw; struct xgene_dma_desc_hw *desc_hw; + struct list_head ld_completed; u8 status; + INIT_LIST_HEAD(&ld_completed); + + spin_lock_bh(&chan->lock); + /* Clean already completed and acked descriptors */ xgene_dma_clean_completed_descriptor(chan); - /* Run the callback for each descriptor, in order */ + /* Move all completed descriptors to ld completed queue, in order */ list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) { /* Get subsequent hw descriptor from DMA rx ring */ desc_hw = &ring->desc_hw[ring->head]; @@ -811,15 +816,17 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) /* Mark this hw descriptor as processed */ desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); - xgene_dma_run_tx_complete_actions(chan, desc_sw); - - xgene_dma_clean_running_descriptor(chan, desc_sw); - /* * Decrement the pending transaction count * as we have processed one */ chan->pending--; + + /* + * Delete this node from ld running queue and append it to + * ld completed queue for further processing + */ + list_move_tail(&desc_sw->node, &ld_completed); } /* @@ -828,6 +835,14 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) * ahead and free the descriptors below. */ xgene_chan_xfer_ld_pending(chan); + + spin_unlock_bh(&chan->lock); + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc_sw, _desc_sw, &ld_completed, node) { + xgene_dma_run_tx_complete_actions(chan, desc_sw); + xgene_dma_clean_running_descriptor(chan, desc_sw); + } } static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan) @@ -876,11 +891,11 @@ static void xgene_dma_free_chan_resources(struct dma_chan *dchan) if (!chan->desc_pool) return; - spin_lock_bh(&chan->lock); - /* Process all running descriptor */ xgene_dma_cleanup_descriptors(chan); + spin_lock_bh(&chan->lock); + /* Clean all link descriptor queues */ xgene_dma_free_desc_list(chan, &chan->ld_pending); xgene_dma_free_desc_list(chan, &chan->ld_running); @@ -1200,15 +1215,11 @@ static void xgene_dma_tasklet_cb(unsigned long data) { struct xgene_dma_chan *chan = (struct xgene_dma_chan *)data; - spin_lock_bh(&chan->lock); - /* Run all cleanup for descriptors which have been completed */ xgene_dma_cleanup_descriptors(chan); /* Re-enable DMA channel IRQ */ enable_irq(chan->rx_irq); - - spin_unlock_bh(&chan->lock); } static irqreturn_t xgene_dma_chan_ring_isr(int irq, void *id) @@ -1740,13 +1751,13 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan, if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { dma_dev->device_prep_dma_xor = xgene_dma_prep_xor; dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC; - dma_dev->xor_align = XGENE_DMA_XOR_ALIGNMENT; + dma_dev->xor_align = DMAENGINE_ALIGN_64_BYTES; } if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { dma_dev->device_prep_dma_pq = xgene_dma_prep_pq; dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC; - dma_dev->pq_align = XGENE_DMA_XOR_ALIGNMENT; + dma_dev->pq_align = DMAENGINE_ALIGN_64_BYTES; } } @@ -1941,16 +1952,18 @@ static int xgene_dma_probe(struct platform_device *pdev) return ret; pdma->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(pdma->clk)) { + if (IS_ERR(pdma->clk) && !ACPI_COMPANION(&pdev->dev)) { dev_err(&pdev->dev, "Failed to get clk\n"); return PTR_ERR(pdma->clk); } /* Enable clk before accessing registers */ - ret = clk_prepare_enable(pdma->clk); - if (ret) { - dev_err(&pdev->dev, "Failed to enable clk %d\n", ret); - return ret; + if (!IS_ERR(pdma->clk)) { + ret = clk_prepare_enable(pdma->clk); + if (ret) { + dev_err(&pdev->dev, "Failed to enable clk %d\n", ret); + return ret; + } } /* Remove DMA RAM out of shutdown */ @@ -1995,7 +2008,8 @@ err_request_irq: err_dma_mask: err_clk_enable: - clk_disable_unprepare(pdma->clk); + if (!IS_ERR(pdma->clk)) + clk_disable_unprepare(pdma->clk); return ret; } @@ -2019,11 +2033,20 @@ static int xgene_dma_remove(struct platform_device *pdev) xgene_dma_delete_chan_rings(chan); } - clk_disable_unprepare(pdma->clk); + if (!IS_ERR(pdma->clk)) + clk_disable_unprepare(pdma->clk); return 0; } +#ifdef CONFIG_ACPI +static const struct acpi_device_id xgene_dma_acpi_match_ptr[] = { + {"APMC0D43", 0}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, xgene_dma_acpi_match_ptr); +#endif + static const struct of_device_id xgene_dma_of_match_ptr[] = { {.compatible = "apm,xgene-storm-dma",}, {}, @@ -2036,6 +2059,7 @@ static struct platform_driver xgene_dma_driver = { .driver = { .name = "X-Gene-DMA", .of_match_table = xgene_dma_of_match_ptr, + .acpi_match_table = ACPI_PTR(xgene_dma_acpi_match_ptr), }, }; diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 6d2f39d36e44..552178799062 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -915,8 +915,8 @@ static void ipu_irq_handle(struct ipu_soc *ipu, const int *regs, int num_regs) static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14}; - struct irq_chip *chip = irq_get_chip(irq); chained_irq_enter(chip, desc); @@ -928,8 +928,8 @@ static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc) static void ipu_err_irq_handler(unsigned int irq, struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); const int int_reg[] = { 4, 5, 8, 9}; - struct irq_chip *chip = irq_get_chip(irq); chained_irq_enter(chip, desc); diff --git a/include/dt-bindings/dma/jz4780-dma.h b/include/dt-bindings/dma/jz4780-dma.h deleted file mode 100644 index df017fdfb44e..000000000000 --- a/include/dt-bindings/dma/jz4780-dma.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef __DT_BINDINGS_DMA_JZ4780_DMA_H__ -#define __DT_BINDINGS_DMA_JZ4780_DMA_H__ - -/* - * Request type numbers for the JZ4780 DMA controller (written to the DRTn - * register for the channel). - */ -#define JZ4780_DMA_I2S1_TX 0x4 -#define JZ4780_DMA_I2S1_RX 0x5 -#define JZ4780_DMA_I2S0_TX 0x6 -#define JZ4780_DMA_I2S0_RX 0x7 -#define JZ4780_DMA_AUTO 0x8 -#define JZ4780_DMA_SADC_RX 0x9 -#define JZ4780_DMA_UART4_TX 0xc -#define JZ4780_DMA_UART4_RX 0xd -#define JZ4780_DMA_UART3_TX 0xe -#define JZ4780_DMA_UART3_RX 0xf -#define JZ4780_DMA_UART2_TX 0x10 -#define JZ4780_DMA_UART2_RX 0x11 -#define JZ4780_DMA_UART1_TX 0x12 -#define JZ4780_DMA_UART1_RX 0x13 -#define JZ4780_DMA_UART0_TX 0x14 -#define JZ4780_DMA_UART0_RX 0x15 -#define JZ4780_DMA_SSI0_TX 0x16 -#define JZ4780_DMA_SSI0_RX 0x17 -#define JZ4780_DMA_SSI1_TX 0x18 -#define JZ4780_DMA_SSI1_RX 0x19 -#define JZ4780_DMA_MSC0_TX 0x1a -#define JZ4780_DMA_MSC0_RX 0x1b -#define JZ4780_DMA_MSC1_TX 0x1c -#define JZ4780_DMA_MSC1_RX 0x1d -#define JZ4780_DMA_MSC2_TX 0x1e -#define JZ4780_DMA_MSC2_RX 0x1f -#define JZ4780_DMA_PCM0_TX 0x20 -#define JZ4780_DMA_PCM0_RX 0x21 -#define JZ4780_DMA_SMB0_TX 0x24 -#define JZ4780_DMA_SMB0_RX 0x25 -#define JZ4780_DMA_SMB1_TX 0x26 -#define JZ4780_DMA_SMB1_RX 0x27 -#define JZ4780_DMA_SMB2_TX 0x28 -#define JZ4780_DMA_SMB2_RX 0x29 -#define JZ4780_DMA_SMB3_TX 0x2a -#define JZ4780_DMA_SMB3_RX 0x2b -#define JZ4780_DMA_SMB4_TX 0x2c -#define JZ4780_DMA_SMB4_RX 0x2d -#define JZ4780_DMA_DES_TX 0x2e -#define JZ4780_DMA_DES_RX 0x2f - -#endif /* __DT_BINDINGS_DMA_JZ4780_DMA_H__ */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index e2f5eb419976..7ea9184eaa13 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -66,6 +66,7 @@ enum dma_transaction_type { DMA_XOR_VAL, DMA_PQ_VAL, DMA_MEMSET, + DMA_MEMSET_SG, DMA_INTERRUPT, DMA_SG, DMA_PRIVATE, @@ -183,6 +184,8 @@ struct dma_interleaved_template { * operation it continues the calculation with new sources * @DMA_PREP_FENCE - tell the driver that subsequent operations depend * on the result of this operation + * @DMA_CTRL_REUSE: client can reuse the descriptor and submit again till + * cleared or freed */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -191,6 +194,7 @@ enum dma_ctrl_flags { DMA_PREP_PQ_DISABLE_Q = (1 << 3), DMA_PREP_CONTINUE = (1 << 4), DMA_PREP_FENCE = (1 << 5), + DMA_CTRL_REUSE = (1 << 6), }; /** @@ -400,6 +404,8 @@ enum dma_residue_granularity { * @cmd_pause: true, if pause and thereby resume is supported * @cmd_terminate: true, if terminate cmd is supported * @residue_granularity: granularity of the reported transfer residue + * @descriptor_reuse: if a descriptor can be reused by client and + * resubmitted multiple times */ struct dma_slave_caps { u32 src_addr_widths; @@ -408,6 +414,7 @@ struct dma_slave_caps { bool cmd_pause; bool cmd_terminate; enum dma_residue_granularity residue_granularity; + bool descriptor_reuse; }; static inline const char *dma_chan_name(struct dma_chan *chan) @@ -467,6 +474,7 @@ struct dma_async_tx_descriptor { dma_addr_t phys; struct dma_chan *chan; dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); + int (*desc_free)(struct dma_async_tx_descriptor *tx); dma_async_tx_callback callback; void *callback_param; struct dmaengine_unmap_data *unmap; @@ -585,6 +593,20 @@ struct dma_tx_state { }; /** + * enum dmaengine_alignment - defines alignment of the DMA async tx + * buffers + */ +enum dmaengine_alignment { + DMAENGINE_ALIGN_1_BYTE = 0, + DMAENGINE_ALIGN_2_BYTES = 1, + DMAENGINE_ALIGN_4_BYTES = 2, + DMAENGINE_ALIGN_8_BYTES = 3, + DMAENGINE_ALIGN_16_BYTES = 4, + DMAENGINE_ALIGN_32_BYTES = 5, + DMAENGINE_ALIGN_64_BYTES = 6, +}; + +/** * struct dma_device - info on the entity supplying DMA services * @chancnt: how many DMA channels are supported * @privatecnt: how many DMA channels are requested by dma_request_channel @@ -616,6 +638,7 @@ struct dma_tx_state { * @device_prep_dma_pq: prepares a pq operation * @device_prep_dma_pq_val: prepares a pqzero_sum operation * @device_prep_dma_memset: prepares a memset operation + * @device_prep_dma_memset_sg: prepares a memset operation over a scatter list * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio. @@ -645,10 +668,10 @@ struct dma_device { dma_cap_mask_t cap_mask; unsigned short max_xor; unsigned short max_pq; - u8 copy_align; - u8 xor_align; - u8 pq_align; - u8 fill_align; + enum dmaengine_alignment copy_align; + enum dmaengine_alignment xor_align; + enum dmaengine_alignment pq_align; + enum dmaengine_alignment fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -682,6 +705,9 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_memset_sg)( + struct dma_chan *chan, struct scatterlist *sg, + unsigned int nents, int value, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_chan *chan, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_sg)( @@ -833,7 +859,8 @@ static inline dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc return desc->tx_submit(desc); } -static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len) +static inline bool dmaengine_check_align(enum dmaengine_alignment align, + size_t off1, size_t off2, size_t len) { size_t mask; @@ -1155,6 +1182,39 @@ static inline int dma_get_slave_caps(struct dma_chan *chan, } #endif +static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx) +{ + struct dma_slave_caps caps; + + dma_get_slave_caps(tx->chan, &caps); + + if (caps.descriptor_reuse) { + tx->flags |= DMA_CTRL_REUSE; + return 0; + } else { + return -EPERM; + } +} + +static inline void dmaengine_desc_clear_reuse(struct dma_async_tx_descriptor *tx) +{ + tx->flags &= ~DMA_CTRL_REUSE; +} + +static inline bool dmaengine_desc_test_reuse(struct dma_async_tx_descriptor *tx) +{ + return (tx->flags & DMA_CTRL_REUSE) == DMA_CTRL_REUSE; +} + +static inline int dmaengine_desc_free(struct dma_async_tx_descriptor *desc) +{ + /* this is supported for reusable desc, so check that */ + if (dmaengine_desc_test_reuse(desc)) + return desc->desc_free(desc); + else + return -EPERM; +} + /* --- DMA device --- */ int dma_async_device_register(struct dma_device *device); @@ -1169,7 +1229,7 @@ struct dma_chan *dma_get_any_slave_channel(struct dma_device *device); static inline struct dma_chan *__dma_request_slave_channel_compat(const dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param, - struct device *dev, char *name) + struct device *dev, const char *name) { struct dma_chan *chan; @@ -1177,6 +1237,9 @@ static inline struct dma_chan if (chan) return chan; + if (!fn || !fn_param) + return NULL; + return __dma_request_channel(mask, fn, fn_param); } #endif /* DMAENGINE_H */ diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index dd0ba502ccb3..d927647e6350 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -128,7 +128,10 @@ void shdma_cleanup(struct shdma_dev *sdev); #if IS_ENABLED(CONFIG_SH_DMAE_BASE) bool shdma_chan_filter(struct dma_chan *chan, void *arg); #else -#define shdma_chan_filter NULL +static inline bool shdma_chan_filter(struct dma_chan *chan, void *arg) +{ + return false; +} #endif #endif |