diff options
32 files changed, 2797 insertions, 406 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 54ebf100e4e0..92e1ab8178a8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1006,6 +1006,7 @@ and is between 256 and 4096 characters. It is defined in the file nomerge forcesac soft + pt [x86, IA64] io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 0490794fe4aa..745e095fe82e 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h @@ -9,6 +9,11 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); extern int force_iommu, no_iommu; extern int iommu_detected; +#ifdef CONFIG_DMAR +extern int iommu_pass_through; +#else +#define iommu_pass_through (0) +#endif extern void iommu_dma_init(void); extern void machvec_init(const char *name); diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index 1376da45fd08..05695962fe44 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -32,6 +32,8 @@ int force_iommu __read_mostly = 1; int force_iommu __read_mostly; #endif +int iommu_pass_through; + /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */ diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8431c6..223abb134105 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -46,7 +46,7 @@ void __init swiotlb_dma_init(void) void __init pci_swiotlb_init(void) { - if (!iommu_detected) { + if (!iommu_detected || iommu_pass_through) { #ifdef CONFIG_IA64_GENERIC swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index af326a2975b5..fd6d21bbee6c 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,6 +6,7 @@ extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern int iommu_pass_through; /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 328592fb6044..47630479b067 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -32,6 +32,8 @@ int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; +int iommu_pass_through; + dma_addr_t bad_dma_address __read_mostly = 0; EXPORT_SYMBOL(bad_dma_address); @@ -209,6 +211,10 @@ static __init int iommu_setup(char *p) #ifdef CONFIG_SWIOTLB if (!strncmp(p, "soft", 4)) swiotlb = 1; + if (!strncmp(p, "pt", 2)) { + iommu_pass_through = 1; + return 1; + } #endif gart_parse_options(p); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index a1712f2b50f1..6af96ee44200 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -71,7 +71,8 @@ void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) + if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || + iommu_pass_through) swiotlb = 1; #endif if (swiotlb_force) diff --git a/drivers/firewire/Kconfig b/drivers/firewire/Kconfig index 450902438208..13efcd362072 100644 --- a/drivers/firewire/Kconfig +++ b/drivers/firewire/Kconfig @@ -1,28 +1,29 @@ -comment "A new alternative FireWire stack is available with EXPERIMENTAL=y" - depends on EXPERIMENTAL=n - -comment "Enable only one of the two stacks, unless you know what you are doing" - depends on EXPERIMENTAL +comment "You can enable one or both FireWire driver stacks." +comment "See the help texts for more information." config FIREWIRE - tristate "New FireWire stack, EXPERIMENTAL" - depends on EXPERIMENTAL + tristate "FireWire driver stack" select CRC_ITU_T help - This is the "Juju" FireWire stack, a new alternative implementation - designed for robustness and simplicity. You can build either this - stack, or the old stack (the ieee1394 driver, ohci1394 etc.) or both. - Please read http://ieee1394.wiki.kernel.org/index.php/Juju_Migration - before you enable the new stack. + This is the new-generation IEEE 1394 (FireWire) driver stack + a.k.a. Juju, a new implementation designed for robustness and + simplicity. + See http://ieee1394.wiki.kernel.org/index.php/Juju_Migration + for information about migration from the older Linux 1394 stack + to the new driver stack. To compile this driver as a module, say M here: the module will be called firewire-core. This module functionally replaces ieee1394, raw1394, and video1394. To access it from application programs, you generally need at least - libraw1394 version 2. IIDC/DCAM applications also need libdc1394 - version 2. No libraries are required to access storage devices - through the firewire-sbp2 driver. + libraw1394 v2. IIDC/DCAM applications need libdc1394 v2. + No libraries are required to access storage devices through the + firewire-sbp2 driver. + + NOTE: + FireWire audio devices currently require the old drivers (ieee1394, + ohci1394, raw1394). config FIREWIRE_OHCI tristate "OHCI-1394 controllers" @@ -37,11 +38,9 @@ config FIREWIRE_OHCI stack. NOTE: - - You should only build either firewire-ohci or the old ohci1394 driver, - but not both. If you nevertheless want to install both, you should - configure them only as modules and blacklist the driver(s) which you - don't want to have auto-loaded. Add either + If you want to install firewire-ohci and ohci1394 together, you + should configure them only as modules and blacklist the driver(s) + which you don't want to have auto-loaded. Add either blacklist firewire-ohci or @@ -50,12 +49,7 @@ config FIREWIRE_OHCI blacklist dv1394 to /etc/modprobe.conf or /etc/modprobe.d/* and update modprobe.conf - depending on your distribution. The latter two modules should be - blacklisted together with ohci1394 because they depend on ohci1394. - - If you have an old modprobe which doesn't implement the blacklist - directive, use "install modulename /bin/true" for the modules to be - blacklisted. + depending on your distribution. config FIREWIRE_OHCI_DEBUG bool @@ -77,3 +71,17 @@ config FIREWIRE_SBP2 You should also enable support for disks, CD-ROMs, etc. in the SCSI configuration section. + +config FIREWIRE_NET + tristate "IP networking over 1394 (EXPERIMENTAL)" + depends on FIREWIRE && INET && EXPERIMENTAL + help + This enables IPv4 over IEEE 1394, providing IP connectivity with + other implementations of RFC 2734 as found on several operating + systems. Multicast support is currently limited. + + NOTE, this driver is not stable yet! + + To compile this driver as a module, say M here: The module will be + called firewire-net. It replaces eth1394 of the classic IEEE 1394 + stack. diff --git a/drivers/firewire/Makefile b/drivers/firewire/Makefile index bc3b9bf822bf..a8f9bb6d9fdf 100644 --- a/drivers/firewire/Makefile +++ b/drivers/firewire/Makefile @@ -6,7 +6,9 @@ firewire-core-y += core-card.o core-cdev.o core-device.o \ core-iso.o core-topology.o core-transaction.o firewire-ohci-y += ohci.o firewire-sbp2-y += sbp2.o +firewire-net-y += net.o -obj-$(CONFIG_FIREWIRE) += firewire-core.o +obj-$(CONFIG_FIREWIRE) += firewire-core.o obj-$(CONFIG_FIREWIRE_OHCI) += firewire-ohci.o obj-$(CONFIG_FIREWIRE_SBP2) += firewire-sbp2.o +obj-$(CONFIG_FIREWIRE_NET) += firewire-net.o diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 4c1be64fdddd..543fccac81bb 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -176,6 +176,7 @@ int fw_core_add_descriptor(struct fw_descriptor *desc) return 0; } +EXPORT_SYMBOL(fw_core_add_descriptor); void fw_core_remove_descriptor(struct fw_descriptor *desc) { @@ -189,6 +190,7 @@ void fw_core_remove_descriptor(struct fw_descriptor *desc) mutex_unlock(&card_mutex); } +EXPORT_SYMBOL(fw_core_remove_descriptor); static void allocate_broadcast_channel(struct fw_card *card, int generation) { @@ -459,11 +461,11 @@ EXPORT_SYMBOL(fw_card_add); /* - * The next few functions implements a dummy driver that use once a - * card driver shuts down an fw_card. This allows the driver to - * cleanly unload, as all IO to the card will be handled by the dummy - * driver instead of calling into the (possibly) unloaded module. The - * dummy driver just fails all IO. + * The next few functions implement a dummy driver that is used once a card + * driver shuts down an fw_card. This allows the driver to cleanly unload, + * as all IO to the card will be handled (and failed) by the dummy driver + * instead of calling into the module. Only functions for iso context + * shutdown still need to be provided by the card driver. */ static int dummy_enable(struct fw_card *card, u32 *config_rom, size_t length) @@ -510,7 +512,7 @@ static int dummy_enable_phys_dma(struct fw_card *card, return -ENODEV; } -static struct fw_card_driver dummy_driver = { +static const struct fw_card_driver dummy_driver_template = { .enable = dummy_enable, .update_phy_reg = dummy_update_phy_reg, .set_config_rom = dummy_set_config_rom, @@ -529,6 +531,8 @@ void fw_card_release(struct kref *kref) void fw_core_remove_card(struct fw_card *card) { + struct fw_card_driver dummy_driver = dummy_driver_template; + card->driver->update_phy_reg(card, 4, PHY_LINK_ACTIVE | PHY_CONTENDER, 0); fw_core_initiate_bus_reset(card, 1); @@ -537,7 +541,9 @@ void fw_core_remove_card(struct fw_card *card) list_del_init(&card->link); mutex_unlock(&card_mutex); - /* Set up the dummy driver. */ + /* Switch off most of the card driver interface. */ + dummy_driver.free_iso_context = card->driver->free_iso_context; + dummy_driver.stop_iso = card->driver->stop_iso; card->driver = &dummy_driver; fw_destroy_nodes(card); diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c index 28076c892d7e..166f19c6d38d 100644 --- a/drivers/firewire/core-iso.c +++ b/drivers/firewire/core-iso.c @@ -71,7 +71,7 @@ int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card, for (j = 0; j < i; j++) { address = page_private(buffer->pages[j]); dma_unmap_page(card->device, address, - PAGE_SIZE, DMA_TO_DEVICE); + PAGE_SIZE, direction); __free_page(buffer->pages[j]); } kfree(buffer->pages); @@ -80,6 +80,7 @@ int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card, return -ENOMEM; } +EXPORT_SYMBOL(fw_iso_buffer_init); int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma) { @@ -107,13 +108,14 @@ void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, for (i = 0; i < buffer->page_count; i++) { address = page_private(buffer->pages[i]); dma_unmap_page(card->device, address, - PAGE_SIZE, DMA_TO_DEVICE); + PAGE_SIZE, buffer->direction); __free_page(buffer->pages[i]); } kfree(buffer->pages); buffer->pages = NULL; } +EXPORT_SYMBOL(fw_iso_buffer_destroy); struct fw_iso_context *fw_iso_context_create(struct fw_card *card, int type, int channel, int speed, size_t header_size, @@ -136,6 +138,7 @@ struct fw_iso_context *fw_iso_context_create(struct fw_card *card, return ctx; } +EXPORT_SYMBOL(fw_iso_context_create); void fw_iso_context_destroy(struct fw_iso_context *ctx) { @@ -143,12 +146,14 @@ void fw_iso_context_destroy(struct fw_iso_context *ctx) card->driver->free_iso_context(ctx); } +EXPORT_SYMBOL(fw_iso_context_destroy); int fw_iso_context_start(struct fw_iso_context *ctx, int cycle, int sync, int tags) { return ctx->card->driver->start_iso(ctx, cycle, sync, tags); } +EXPORT_SYMBOL(fw_iso_context_start); int fw_iso_context_queue(struct fw_iso_context *ctx, struct fw_iso_packet *packet, @@ -159,11 +164,13 @@ int fw_iso_context_queue(struct fw_iso_context *ctx, return card->driver->queue_iso(ctx, packet, buffer, payload); } +EXPORT_SYMBOL(fw_iso_context_queue); int fw_iso_context_stop(struct fw_iso_context *ctx) { return ctx->card->driver->stop_iso(ctx); } +EXPORT_SYMBOL(fw_iso_context_stop); /* * Isochronous bus resource management (channels, bandwidth), client side diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h index 0a25a7b38a80..c3cfc647e5e3 100644 --- a/drivers/firewire/core.h +++ b/drivers/firewire/core.h @@ -1,7 +1,6 @@ #ifndef _FIREWIRE_CORE_H #define _FIREWIRE_CORE_H -#include <linux/dma-mapping.h> #include <linux/fs.h> #include <linux/list.h> #include <linux/idr.h> @@ -97,17 +96,6 @@ int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset); int fw_compute_block_crc(u32 *block); void fw_schedule_bm_work(struct fw_card *card, unsigned long delay); -struct fw_descriptor { - struct list_head link; - size_t length; - u32 immediate; - u32 key; - const u32 *data; -}; - -int fw_core_add_descriptor(struct fw_descriptor *desc); -void fw_core_remove_descriptor(struct fw_descriptor *desc); - /* -cdev */ @@ -130,77 +118,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event); /* -iso */ -/* - * The iso packet format allows for an immediate header/payload part - * stored in 'header' immediately after the packet info plus an - * indirect payload part that is pointer to by the 'payload' field. - * Applications can use one or the other or both to implement simple - * low-bandwidth streaming (e.g. audio) or more advanced - * scatter-gather streaming (e.g. assembling video frame automatically). - */ -struct fw_iso_packet { - u16 payload_length; /* Length of indirect payload. */ - u32 interrupt:1; /* Generate interrupt on this packet */ - u32 skip:1; /* Set to not send packet at all. */ - u32 tag:2; - u32 sy:4; - u32 header_length:8; /* Length of immediate header. */ - u32 header[0]; -}; - -#define FW_ISO_CONTEXT_TRANSMIT 0 -#define FW_ISO_CONTEXT_RECEIVE 1 - -#define FW_ISO_CONTEXT_MATCH_TAG0 1 -#define FW_ISO_CONTEXT_MATCH_TAG1 2 -#define FW_ISO_CONTEXT_MATCH_TAG2 4 -#define FW_ISO_CONTEXT_MATCH_TAG3 8 -#define FW_ISO_CONTEXT_MATCH_ALL_TAGS 15 - -/* - * An iso buffer is just a set of pages mapped for DMA in the - * specified direction. Since the pages are to be used for DMA, they - * are not mapped into the kernel virtual address space. We store the - * DMA address in the page private. The helper function - * fw_iso_buffer_map() will map the pages into a given vma. - */ -struct fw_iso_buffer { - enum dma_data_direction direction; - struct page **pages; - int page_count; -}; - -typedef void (*fw_iso_callback_t)(struct fw_iso_context *context, - u32 cycle, size_t header_length, - void *header, void *data); - -struct fw_iso_context { - struct fw_card *card; - int type; - int channel; - int speed; - size_t header_size; - fw_iso_callback_t callback; - void *callback_data; -}; - -int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card, - int page_count, enum dma_data_direction direction); int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma); -void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, struct fw_card *card); - -struct fw_iso_context *fw_iso_context_create(struct fw_card *card, - int type, int channel, int speed, size_t header_size, - fw_iso_callback_t callback, void *callback_data); -int fw_iso_context_queue(struct fw_iso_context *ctx, - struct fw_iso_packet *packet, - struct fw_iso_buffer *buffer, - unsigned long payload); -int fw_iso_context_start(struct fw_iso_context *ctx, - int cycle, int sync, int tags); -int fw_iso_context_stop(struct fw_iso_context *ctx); -void fw_iso_context_destroy(struct fw_iso_context *ctx); - void fw_iso_resource_manage(struct fw_card *card, int generation, u64 channels_mask, int *channel, int *bandwidth, bool allocate); @@ -285,9 +203,4 @@ void fw_flush_transactions(struct fw_card *card); void fw_send_phy_config(struct fw_card *card, int node_id, int generation, int gap_count); -static inline int fw_stream_packet_destination_id(int tag, int channel, int sy) -{ - return tag << 14 | channel << 8 | sy; -} - #endif /* _FIREWIRE_CORE_H */ diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c new file mode 100644 index 000000000000..a42209a73aed --- /dev/null +++ b/drivers/firewire/net.c @@ -0,0 +1,1655 @@ +/* + * IPv4 over IEEE 1394, per RFC 2734 + * + * Copyright (C) 2009 Jay Fenlason <fenlason@redhat.com> + * + * based on eth1394 by Ben Collins et al + */ + +#include <linux/bug.h> +#include <linux/device.h> +#include <linux/ethtool.h> +#include <linux/firewire.h> +#include <linux/firewire-constants.h> +#include <linux/highmem.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/jiffies.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/mutex.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> + +#include <asm/unaligned.h> +#include <net/arp.h> + +#define FWNET_MAX_FRAGMENTS 25 /* arbitrary limit */ +#define FWNET_ISO_PAGE_COUNT (PAGE_SIZE < 16 * 1024 ? 4 : 2) + +#define IEEE1394_BROADCAST_CHANNEL 31 +#define IEEE1394_ALL_NODES (0xffc0 | 0x003f) +#define IEEE1394_MAX_PAYLOAD_S100 512 +#define FWNET_NO_FIFO_ADDR (~0ULL) + +#define IANA_SPECIFIER_ID 0x00005eU +#define RFC2734_SW_VERSION 0x000001U + +#define IEEE1394_GASP_HDR_SIZE 8 + +#define RFC2374_UNFRAG_HDR_SIZE 4 +#define RFC2374_FRAG_HDR_SIZE 8 +#define RFC2374_FRAG_OVERHEAD 4 + +#define RFC2374_HDR_UNFRAG 0 /* unfragmented */ +#define RFC2374_HDR_FIRSTFRAG 1 /* first fragment */ +#define RFC2374_HDR_LASTFRAG 2 /* last fragment */ +#define RFC2374_HDR_INTFRAG 3 /* interior fragment */ + +#define RFC2734_HW_ADDR_LEN 16 + +struct rfc2734_arp { + __be16 hw_type; /* 0x0018 */ + __be16 proto_type; /* 0x0806 */ + u8 hw_addr_len; /* 16 */ + u8 ip_addr_len; /* 4 */ + __be16 opcode; /* ARP Opcode */ + /* Above is exactly the same format as struct arphdr */ + + __be64 s_uniq_id; /* Sender's 64bit EUI */ + u8 max_rec; /* Sender's max packet size */ + u8 sspd; /* Sender's max speed */ + __be16 fifo_hi; /* hi 16bits of sender's FIFO addr */ + __be32 fifo_lo; /* lo 32bits of sender's FIFO addr */ + __be32 sip; /* Sender's IP Address */ + __be32 tip; /* IP Address of requested hw addr */ +} __attribute__((packed)); + +/* This header format is specific to this driver implementation. */ +#define FWNET_ALEN 8 +#define FWNET_HLEN 10 +struct fwnet_header { + u8 h_dest[FWNET_ALEN]; /* destination address */ + __be16 h_proto; /* packet type ID field */ +} __attribute__((packed)); + +/* IPv4 and IPv6 encapsulation header */ +struct rfc2734_header { + u32 w0; + u32 w1; +}; + +#define fwnet_get_hdr_lf(h) (((h)->w0 & 0xc0000000) >> 30) +#define fwnet_get_hdr_ether_type(h) (((h)->w0 & 0x0000ffff)) +#define fwnet_get_hdr_dg_size(h) (((h)->w0 & 0x0fff0000) >> 16) +#define fwnet_get_hdr_fg_off(h) (((h)->w0 & 0x00000fff)) +#define fwnet_get_hdr_dgl(h) (((h)->w1 & 0xffff0000) >> 16) + +#define fwnet_set_hdr_lf(lf) ((lf) << 30) +#define fwnet_set_hdr_ether_type(et) (et) +#define fwnet_set_hdr_dg_size(dgs) ((dgs) << 16) +#define fwnet_set_hdr_fg_off(fgo) (fgo) + +#define fwnet_set_hdr_dgl(dgl) ((dgl) << 16) + +static inline void fwnet_make_uf_hdr(struct rfc2734_header *hdr, + unsigned ether_type) +{ + hdr->w0 = fwnet_set_hdr_lf(RFC2374_HDR_UNFRAG) + | fwnet_set_hdr_ether_type(ether_type); +} + +static inline void fwnet_make_ff_hdr(struct rfc2734_header *hdr, + unsigned ether_type, unsigned dg_size, unsigned dgl) +{ + hdr->w0 = fwnet_set_hdr_lf(RFC2374_HDR_FIRSTFRAG) + | fwnet_set_hdr_dg_size(dg_size) + | fwnet_set_hdr_ether_type(ether_type); + hdr->w1 = fwnet_set_hdr_dgl(dgl); +} + +static inline void fwnet_make_sf_hdr(struct rfc2734_header *hdr, + unsigned lf, unsigned dg_size, unsigned fg_off, unsigned dgl) +{ + hdr->w0 = fwnet_set_hdr_lf(lf) + | fwnet_set_hdr_dg_size(dg_size) + | fwnet_set_hdr_fg_off(fg_off); + hdr->w1 = fwnet_set_hdr_dgl(dgl); +} + +/* This list keeps track of what parts of the datagram have been filled in */ +struct fwnet_fragment_info { + struct list_head fi_link; + u16 offset; + u16 len; +}; + +struct fwnet_partial_datagram { + struct list_head pd_link; + struct list_head fi_list; + struct sk_buff *skb; + /* FIXME Why not use skb->data? */ + char *pbuf; + u16 datagram_label; + u16 ether_type; + u16 datagram_size; +}; + +static DEFINE_MUTEX(fwnet_device_mutex); +static LIST_HEAD(fwnet_device_list); + +struct fwnet_device { + struct list_head dev_link; + spinlock_t lock; + enum { + FWNET_BROADCAST_ERROR, + FWNET_BROADCAST_RUNNING, + FWNET_BROADCAST_STOPPED, + } broadcast_state; + struct fw_iso_context *broadcast_rcv_context; + struct fw_iso_buffer broadcast_rcv_buffer; + void **broadcast_rcv_buffer_ptrs; + unsigned broadcast_rcv_next_ptr; + unsigned num_broadcast_rcv_ptrs; + unsigned rcv_buffer_size; + /* + * This value is the maximum unfragmented datagram size that can be + * sent by the hardware. It already has the GASP overhead and the + * unfragmented datagram header overhead calculated into it. + */ + unsigned broadcast_xmt_max_payload; + u16 broadcast_xmt_datagramlabel; + + /* + * The CSR address that remote nodes must send datagrams to for us to + * receive them. + */ + struct fw_address_handler handler; + u64 local_fifo; + + /* List of packets to be sent */ + struct list_head packet_list; + /* + * List of packets that were broadcasted. When we get an ISO interrupt + * one of them has been sent + */ + struct list_head broadcasted_list; + /* List of packets that have been sent but not yet acked */ + struct list_head sent_list; + + struct list_head peer_list; + struct fw_card *card; + struct net_device *netdev; +}; + +struct fwnet_peer { + struct list_head peer_link; + struct fwnet_device *dev; + u64 guid; + u64 fifo; + + /* guarded by dev->lock */ + struct list_head pd_list; /* received partial datagrams */ + unsigned pdg_size; /* pd_list size */ + + u16 datagram_label; /* outgoing datagram label */ + unsigned max_payload; /* includes RFC2374_FRAG_HDR_SIZE overhead */ + int node_id; + int generation; + unsigned speed; +}; + +/* This is our task struct. It's used for the packet complete callback. */ +struct fwnet_packet_task { + /* + * ptask can actually be on dev->packet_list, dev->broadcasted_list, + * or dev->sent_list depending on its current state. + */ + struct list_head pt_link; + struct fw_transaction transaction; + struct rfc2734_header hdr; + struct sk_buff *skb; + struct fwnet_device *dev; + + int outstanding_pkts; + unsigned max_payload; + u64 fifo_addr; + u16 dest_node; + u8 generation; + u8 speed; +}; + +/* + * saddr == NULL means use device source address. + * daddr == NULL means leave destination address (eg unresolved arp). + */ +static int fwnet_header_create(struct sk_buff *skb, struct net_device *net, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) +{ + struct fwnet_header *h; + + h = (struct fwnet_header *)skb_push(skb, sizeof(*h)); + put_unaligned_be16(type, &h->h_proto); + + if (net->flags & (IFF_LOOPBACK | IFF_NOARP)) { + memset(h->h_dest, 0, net->addr_len); + + return net->hard_header_len; + } + + if (daddr) { + memcpy(h->h_dest, daddr, net->addr_len); + + return net->hard_header_len; + } + + return -net->hard_header_len; +} + +static int fwnet_header_rebuild(struct sk_buff *skb) +{ + struct fwnet_header *h = (struct fwnet_header *)skb->data; + + if (get_unaligned_be16(&h->h_proto) == ETH_P_IP) + return arp_find((unsigned char *)&h->h_dest, skb); + + fw_notify("%s: unable to resolve type %04x addresses\n", + skb->dev->name, be16_to_cpu(h->h_proto)); + return 0; +} + +static int fwnet_header_cache(const struct neighbour *neigh, + struct hh_cache *hh) +{ + struct net_device *net; + struct fwnet_header *h; + + if (hh->hh_type == cpu_to_be16(ETH_P_802_3)) + return -1; + net = neigh->dev; + h = (struct fwnet_header *)((u8 *)hh->hh_data + 16 - sizeof(*h)); + h->h_proto = hh->hh_type; + memcpy(h->h_dest, neigh->ha, net->addr_len); + hh->hh_len = FWNET_HLEN; + + return 0; +} + +/* Called by Address Resolution module to notify changes in address. */ +static void fwnet_header_cache_update(struct hh_cache *hh, + const struct net_device *net, const unsigned char *haddr) +{ + memcpy((u8 *)hh->hh_data + 16 - FWNET_HLEN, haddr, net->addr_len); +} + +static int fwnet_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + memcpy(haddr, skb->dev->dev_addr, FWNET_ALEN); + + return FWNET_ALEN; +} + +static const struct header_ops fwnet_header_ops = { + .create = fwnet_header_create, + .rebuild = fwnet_header_rebuild, + .cache = fwnet_header_cache, + .cache_update = fwnet_header_cache_update, + .parse = fwnet_header_parse, +}; + +/* FIXME: is this correct for all cases? */ +static bool fwnet_frag_overlap(struct fwnet_partial_datagram *pd, + unsigned offset, unsigned len) +{ + struct fwnet_fragment_info *fi; + unsigned end = offset + len; + + list_for_each_entry(fi, &pd->fi_list, fi_link) + if (offset < fi->offset + fi->len && end > fi->offset) + return true; + + return false; +} + +/* Assumes that new fragment does not overlap any existing fragments */ +static struct fwnet_fragment_info *fwnet_frag_new( + struct fwnet_partial_datagram *pd, unsigned offset, unsigned len) +{ + struct fwnet_fragment_info *fi, *fi2, *new; + struct list_head *list; + + list = &pd->fi_list; + list_for_each_entry(fi, &pd->fi_list, fi_link) { + if (fi->offset + fi->len == offset) { + /* The new fragment can be tacked on to the end */ + /* Did the new fragment plug a hole? */ + fi2 = list_entry(fi->fi_link.next, + struct fwnet_fragment_info, fi_link); + if (fi->offset + fi->len == fi2->offset) { + /* glue fragments together */ + fi->len += len + fi2->len; + list_del(&fi2->fi_link); + kfree(fi2); + } else { + fi->len += len; + } + + return fi; + } + if (offset + len == fi->offset) { + /* The new fragment can be tacked on to the beginning */ + /* Did the new fragment plug a hole? */ + fi2 = list_entry(fi->fi_link.prev, + struct fwnet_fragment_info, fi_link); + if (fi2->offset + fi2->len == fi->offset) { + /* glue fragments together */ + fi2->len += fi->len + len; + list_del(&fi->fi_link); + kfree(fi); + + return fi2; + } + fi->offset = offset; + fi->len += len; + + return fi; + } + if (offset > fi->offset + fi->len) { + list = &fi->fi_link; + break; + } + if (offset + len < fi->offset) { + list = fi->fi_link.prev; + break; + } + } + + new = kmalloc(sizeof(*new), GFP_ATOMIC); + if (!new) { + fw_error("out of memory\n"); + return NULL; + } + + new->offset = offset; + new->len = len; + list_add(&new->fi_link, list); + + return new; +} + +static struct fwnet_partial_datagram *fwnet_pd_new(struct net_device *net, + struct fwnet_peer *peer, u16 datagram_label, unsigned dg_size, + void *frag_buf, unsigned frag_off, unsigned frag_len) +{ + struct fwnet_partial_datagram *new; + struct fwnet_fragment_info *fi; + + new = kmalloc(sizeof(*new), GFP_ATOMIC); + if (!new) + goto fail; + + INIT_LIST_HEAD(&new->fi_list); + fi = fwnet_frag_new(new, frag_off, frag_len); + if (fi == NULL) + goto fail_w_new; + + new->datagram_label = datagram_label; + new->datagram_size = dg_size; + new->skb = dev_alloc_skb(dg_size + net->hard_header_len + 15); + if (new->skb == NULL) + goto fail_w_fi; + + skb_reserve(new->skb, (net->hard_header_len + 15) & ~15); + new->pbuf = skb_put(new->skb, dg_size); + memcpy(new->pbuf + frag_off, frag_buf, frag_len); + list_add_tail(&new->pd_link, &peer->pd_list); + + return new; + +fail_w_fi: + kfree(fi); +fail_w_new: + kfree(new); +fail: + fw_error("out of memory\n"); + + return NULL; +} + +static struct fwnet_partial_datagram *fwnet_pd_find(struct fwnet_peer *peer, + u16 datagram_label) +{ + struct fwnet_partial_datagram *pd; + + list_for_each_entry(pd, &peer->pd_list, pd_link) + if (pd->datagram_label == datagram_label) + return pd; + + return NULL; +} + + +static void fwnet_pd_delete(struct fwnet_partial_datagram *old) +{ + struct fwnet_fragment_info *fi, *n; + + list_for_each_entry_safe(fi, n, &old->fi_list, fi_link) + kfree(fi); + + list_del(&old->pd_link); + dev_kfree_skb_any(old->skb); + kfree(old); +} + +static bool fwnet_pd_update(struct fwnet_peer *peer, + struct fwnet_partial_datagram *pd, void *frag_buf, + unsigned frag_off, unsigned frag_len) +{ + if (fwnet_frag_new(pd, frag_off, frag_len) == NULL) + return false; + + memcpy(pd->pbuf + frag_off, frag_buf, frag_len); + + /* + * Move list entry to beginnig of list so that oldest partial + * datagrams percolate to the end of the list + */ + list_move_tail(&pd->pd_link, &peer->pd_list); + + return true; +} + +static bool fwnet_pd_is_complete(struct fwnet_partial_datagram *pd) +{ + struct fwnet_fragment_info *fi; + + fi = list_entry(pd->fi_list.next, struct fwnet_fragment_info, fi_link); + + return fi->len == pd->datagram_size; +} + +/* caller must hold dev->lock */ +static struct fwnet_peer *fwnet_peer_find_by_guid(struct fwnet_device *dev, + u64 guid) +{ + struct fwnet_peer *peer; + + list_for_each_entry(peer, &dev->peer_list, peer_link) + if (peer->guid == guid) + return peer; + + return NULL; +} + +/* caller must hold dev->lock */ +static struct fwnet_peer *fwnet_peer_find_by_node_id(struct fwnet_device *dev, + int node_id, int generation) +{ + struct fwnet_peer *peer; + + list_for_each_entry(peer, &dev->peer_list, peer_link) + if (peer->node_id == node_id && + peer->generation == generation) + return peer; + + return NULL; +} + +/* See IEEE 1394-2008 table 6-4, table 8-8, table 16-18. */ +static unsigned fwnet_max_payload(unsigned max_rec, unsigned speed) +{ + max_rec = min(max_rec, speed + 8); + max_rec = min(max_rec, 0xbU); /* <= 4096 */ + if (max_rec < 8) { + fw_notify("max_rec %x out of range\n", max_rec); + max_rec = 8; + } + + return (1 << (max_rec + 1)) - RFC2374_FRAG_HDR_SIZE; +} + + +static int fwnet_finish_incoming_packet(struct net_device *net, + struct sk_buff *skb, u16 source_node_id, + bool is_broadcast, u16 ether_type) +{ + struct fwnet_device *dev; + static const __be64 broadcast_hw = cpu_to_be64(~0ULL); + int status; + __be64 guid; + + dev = netdev_priv(net); + /* Write metadata, and then pass to the receive level */ + skb->dev = net; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* don't check it */ + + /* + * Parse the encapsulation header. This actually does the job of + * converting to an ethernet frame header, as well as arp + * conversion if needed. ARP conversion is easier in this + * direction, since we are using ethernet as our backend. + */ + /* + * If this is an ARP packet, convert it. First, we want to make + * use of some of the fields, since they tell us a little bit + * about the sending machine. + */ + if (ether_type == ETH_P_ARP) { + struct rfc2734_arp *arp1394; + struct arphdr *arp; + unsigned char *arp_ptr; + u64 fifo_addr; + u64 peer_guid; + unsigned sspd; + u16 max_payload; + struct fwnet_peer *peer; + unsigned long flags; + + arp1394 = (struct rfc2734_arp *)skb->data; + arp = (struct arphdr *)skb->data; + arp_ptr = (unsigned char *)(arp + 1); + peer_guid = get_unaligned_be64(&arp1394->s_uniq_id); + fifo_addr = (u64)get_unaligned_be16(&arp1394->fifo_hi) << 32 + | get_unaligned_be32(&arp1394->fifo_lo); + + sspd = arp1394->sspd; + /* Sanity check. OS X 10.3 PPC reportedly sends 131. */ + if (sspd > SCODE_3200) { + fw_notify("sspd %x out of range\n", sspd); + sspd = SCODE_3200; + } + max_payload = fwnet_max_payload(arp1394->max_rec, sspd); + + spin_lock_irqsave(&dev->lock, flags); + peer = fwnet_peer_find_by_guid(dev, peer_guid); + if (peer) { + peer->fifo = fifo_addr; + + if (peer->speed > sspd) + peer->speed = sspd; + if (peer->max_payload > max_payload) + peer->max_payload = max_payload; + } + spin_unlock_irqrestore(&dev->lock, flags); + + if (!peer) { + fw_notify("No peer for ARP packet from %016llx\n", + (unsigned long long)peer_guid); + goto failed_proto; + } + + /* + * Now that we're done with the 1394 specific stuff, we'll + * need to alter some of the data. Believe it or not, all + * that needs to be done is sender_IP_address needs to be + * moved, the destination hardware address get stuffed + * in and the hardware address length set to 8. + * + * IMPORTANT: The code below overwrites 1394 specific data + * needed above so keep the munging of the data for the + * higher level IP stack last. + */ + + arp->ar_hln = 8; + /* skip over sender unique id */ + arp_ptr += arp->ar_hln; + /* move sender IP addr */ + put_unaligned(arp1394->sip, (u32 *)arp_ptr); + /* skip over sender IP addr */ + arp_ptr += arp->ar_pln; + + if (arp->ar_op == htons(ARPOP_REQUEST)) + memset(arp_ptr, 0, sizeof(u64)); + else + memcpy(arp_ptr, net->dev_addr, sizeof(u64)); + } + + /* Now add the ethernet header. */ + guid = cpu_to_be64(dev->card->guid); + if (dev_hard_header(skb, net, ether_type, + is_broadcast ? &broadcast_hw : &guid, + NULL, skb->len) >= 0) { + struct fwnet_header *eth; + u16 *rawp; + __be16 protocol; + + skb_reset_mac_header(skb); + skb_pull(skb, sizeof(*eth)); + eth = (struct fwnet_header *)skb_mac_header(skb); + if (*eth->h_dest & 1) { + if (memcmp(eth->h_dest, net->broadcast, + net->addr_len) == 0) + skb->pkt_type = PACKET_BROADCAST; +#if 0 + else + skb->pkt_type = PACKET_MULTICAST; +#endif + } else { + if (memcmp(eth->h_dest, net->dev_addr, net->addr_len)) + skb->pkt_type = PACKET_OTHERHOST; + } + if (ntohs(eth->h_proto) >= 1536) { + protocol = eth->h_proto; + } else { + rawp = (u16 *)skb->data; + if (*rawp == 0xffff) + protocol = htons(ETH_P_802_3); + else + protocol = htons(ETH_P_802_2); + } + skb->protocol = protocol; + } + status = netif_rx(skb); + if (status == NET_RX_DROP) { + net->stats.rx_errors++; + net->stats.rx_dropped++; + } else { + net->stats.rx_packets++; + net->stats.rx_bytes += skb->len; + } + if (netif_queue_stopped(net)) + netif_wake_queue(net); + + return 0; + + failed_proto: + net->stats.rx_errors++; + net->stats.rx_dropped++; + + dev_kfree_skb_any(skb); + if (netif_queue_stopped(net)) + netif_wake_queue(net); + + net->last_rx = jiffies; + + return 0; +} + +static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, + int source_node_id, int generation, + bool is_broadcast) +{ + struct sk_buff *skb; + struct net_device *net = dev->netdev; + struct rfc2734_header hdr; + unsigned lf; + unsigned long flags; + struct fwnet_peer *peer; + struct fwnet_partial_datagram *pd; + int fg_off; + int dg_size; + u16 datagram_label; + int retval; + u16 ether_type; + + hdr.w0 = be32_to_cpu(buf[0]); + lf = fwnet_get_hdr_lf(&hdr); + if (lf == RFC2374_HDR_UNFRAG) { + /* + * An unfragmented datagram has been received by the ieee1394 + * bus. Build an skbuff around it so we can pass it to the + * high level network layer. + */ + ether_type = fwnet_get_hdr_ether_type(&hdr); + buf++; + len -= RFC2374_UNFRAG_HDR_SIZE; + + skb = dev_alloc_skb(len + net->hard_header_len + 15); + if (unlikely(!skb)) { + fw_error("out of memory\n"); + net->stats.rx_dropped++; + + return -1; + } + skb_reserve(skb, (net->hard_header_len + 15) & ~15); + memcpy(skb_put(skb, len), buf, len); + + return fwnet_finish_incoming_packet(net, skb, source_node_id, + is_broadcast, ether_type); + } + /* A datagram fragment has been received, now the fun begins. */ + hdr.w1 = ntohl(buf[1]); + buf += 2; + len -= RFC2374_FRAG_HDR_SIZE; + if (lf == RFC2374_HDR_FIRSTFRAG) { + ether_type = fwnet_get_hdr_ether_type(&hdr); + fg_off = 0; + } else { + ether_type = 0; + fg_off = fwnet_get_hdr_fg_off(&hdr); + } + datagram_label = fwnet_get_hdr_dgl(&hdr); + dg_size = fwnet_get_hdr_dg_size(&hdr); /* ??? + 1 */ + + spin_lock_irqsave(&dev->lock, flags); + + peer = fwnet_peer_find_by_node_id(dev, source_node_id, generation); + if (!peer) + goto bad_proto; + + pd = fwnet_pd_find(peer, datagram_label); + if (pd == NULL) { + while (peer->pdg_size >= FWNET_MAX_FRAGMENTS) { + /* remove the oldest */ + fwnet_pd_delete(list_first_entry(&peer->pd_list, + struct fwnet_partial_datagram, pd_link)); + peer->pdg_size--; + } + pd = fwnet_pd_new(net, peer, datagram_label, + dg_size, buf, fg_off, len); + if (pd == NULL) { + retval = -ENOMEM; + goto bad_proto; + } + peer->pdg_size++; + } else { + if (fwnet_frag_overlap(pd, fg_off, len) || + pd->datagram_size != dg_size) { + /* + * Differing datagram sizes or overlapping fragments, + * discard old datagram and start a new one. + */ + fwnet_pd_delete(pd); + pd = fwnet_pd_new(net, peer, datagram_label, + dg_size, buf, fg_off, len); + if (pd == NULL) { + retval = -ENOMEM; + peer->pdg_size--; + goto bad_proto; + } + } else { + if (!fwnet_pd_update(peer, pd, buf, fg_off, len)) { + /* + * Couldn't save off fragment anyway + * so might as well obliterate the + * datagram now. + */ + fwnet_pd_delete(pd); + peer->pdg_size--; + goto bad_proto; + } + } + } /* new datagram or add to existing one */ + + if (lf == RFC2374_HDR_FIRSTFRAG) + pd->ether_type = ether_type; + + if (fwnet_pd_is_complete(pd)) { + ether_type = pd->ether_type; + peer->pdg_size--; + skb = skb_get(pd->skb); + fwnet_pd_delete(pd); + + spin_unlock_irqrestore(&dev->lock, flags); + + return fwnet_finish_incoming_packet(net, skb, source_node_id, + false, ether_type); + } + /* + * Datagram is not complete, we're done for the + * moment. + */ + spin_unlock_irqrestore(&dev->lock, flags); + + return 0; + + bad_proto: + spin_unlock_irqrestore(&dev->lock, flags); + + if (netif_queue_stopped(net)) + netif_wake_queue(net); + + return 0; +} + +static void fwnet_receive_packet(struct fw_card *card, struct fw_request *r, + int tcode, int destination, int source, int generation, + int speed, unsigned long long offset, void *payload, + size_t length, void *callback_data) +{ + struct fwnet_device *dev = callback_data; + int rcode; + + if (destination == IEEE1394_ALL_NODES) { + kfree(r); + + return; + } + + if (offset != dev->handler.offset) + rcode = RCODE_ADDRESS_ERROR; + else if (tcode != TCODE_WRITE_BLOCK_REQUEST) + rcode = RCODE_TYPE_ERROR; + else if (fwnet_incoming_packet(dev, payload, length, + source, generation, false) != 0) { + fw_error("Incoming packet failure\n"); + rcode = RCODE_CONFLICT_ERROR; + } else + rcode = RCODE_COMPLETE; + + fw_send_response(card, r, rcode); +} + +static void fwnet_receive_broadcast(struct fw_iso_context *context, + u32 cycle, size_t header_length, void *header, void *data) +{ + struct fwnet_device *dev; + struct fw_iso_packet packet; + struct fw_card *card; + __be16 *hdr_ptr; + __be32 *buf_ptr; + int retval; + u32 length; + u16 source_node_id; + u32 specifier_id; + u32 ver; + unsigned long offset; + unsigned long flags; + + dev = data; + card = dev->card; + hdr_ptr = header; + length = be16_to_cpup(hdr_ptr); + + spin_lock_irqsave(&dev->lock, flags); + + offset = dev->rcv_buffer_size * dev->broadcast_rcv_next_ptr; + buf_ptr = dev->broadcast_rcv_buffer_ptrs[dev->broadcast_rcv_next_ptr++]; + if (dev->broadcast_rcv_next_ptr == dev->num_broadcast_rcv_ptrs) + dev->broadcast_rcv_next_ptr = 0; + + spin_unlock_irqrestore(&dev->lock, flags); + + specifier_id = (be32_to_cpu(buf_ptr[0]) & 0xffff) << 8 + | (be32_to_cpu(buf_ptr[1]) & 0xff000000) >> 24; + ver = be32_to_cpu(buf_ptr[1]) & 0xffffff; + source_node_id = be32_to_cpu(buf_ptr[0]) >> 16; + + if (specifier_id == IANA_SPECIFIER_ID && ver == RFC2734_SW_VERSION) { + buf_ptr += 2; + length -= IEEE1394_GASP_HDR_SIZE; + fwnet_incoming_packet(dev, buf_ptr, length, + source_node_id, -1, true); + } + + packet.payload_length = dev->rcv_buffer_size; + packet.interrupt = 1; + packet.skip = 0; + packet.tag = 3; + packet.sy = 0; + packet.header_length = IEEE1394_GASP_HDR_SIZE; + + spin_lock_irqsave(&dev->lock, flags); + + retval = fw_iso_context_queue(dev->broadcast_rcv_context, &packet, + &dev->broadcast_rcv_buffer, offset); + + spin_unlock_irqrestore(&dev->lock, flags); + + if (retval < 0) + fw_error("requeue failed\n"); +} + +static struct kmem_cache *fwnet_packet_task_cache; + +static int fwnet_send_packet(struct fwnet_packet_task *ptask); + +static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask) +{ + struct fwnet_device *dev; + unsigned long flags; + + dev = ptask->dev; + + spin_lock_irqsave(&dev->lock, flags); + list_del(&ptask->pt_link); + spin_unlock_irqrestore(&dev->lock, flags); + + ptask->outstanding_pkts--; /* FIXME access inside lock */ + + if (ptask->outstanding_pkts > 0) { + u16 dg_size; + u16 fg_off; + u16 datagram_label; + u16 lf; + struct sk_buff *skb; + + /* Update the ptask to point to the next fragment and send it */ + lf = fwnet_get_hdr_lf(&ptask->hdr); + switch (lf) { + case RFC2374_HDR_LASTFRAG: + case RFC2374_HDR_UNFRAG: + default: + fw_error("Outstanding packet %x lf %x, header %x,%x\n", + ptask->outstanding_pkts, lf, ptask->hdr.w0, + ptask->hdr.w1); + BUG(); + + case RFC2374_HDR_FIRSTFRAG: + /* Set frag type here for future interior fragments */ + dg_size = fwnet_get_hdr_dg_size(&ptask->hdr); + fg_off = ptask->max_payload - RFC2374_FRAG_HDR_SIZE; + datagram_label = fwnet_get_hdr_dgl(&ptask->hdr); + break; + + case RFC2374_HDR_INTFRAG: + dg_size = fwnet_get_hdr_dg_size(&ptask->hdr); + fg_off = fwnet_get_hdr_fg_off(&ptask->hdr) + + ptask->max_payload - RFC2374_FRAG_HDR_SIZE; + datagram_label = fwnet_get_hdr_dgl(&ptask->hdr); + break; + } + skb = ptask->skb; + skb_pull(skb, ptask->max_payload); + if (ptask->outstanding_pkts > 1) { + fwnet_make_sf_hdr(&ptask->hdr, RFC2374_HDR_INTFRAG, + dg_size, fg_off, datagram_label); + } else { + fwnet_make_sf_hdr(&ptask->hdr, RFC2374_HDR_LASTFRAG, + dg_size, fg_off, datagram_label); + ptask->max_payload = skb->len + RFC2374_FRAG_HDR_SIZE; + } + fwnet_send_packet(ptask); + } else { + dev_kfree_skb_any(ptask->skb); + kmem_cache_free(fwnet_packet_task_cache, ptask); + } +} + +static void fwnet_write_complete(struct fw_card *card, int rcode, + void *payload, size_t length, void *data) +{ + struct fwnet_packet_task *ptask; + + ptask = data; + + if (rcode == RCODE_COMPLETE) + fwnet_transmit_packet_done(ptask); + else + fw_error("fwnet_write_complete: failed: %x\n", rcode); + /* ??? error recovery */ +} + +static int fwnet_send_packet(struct fwnet_packet_task *ptask) +{ + struct fwnet_device *dev; + unsigned tx_len; + struct rfc2734_header *bufhdr; + unsigned long flags; + + dev = ptask->dev; + tx_len = ptask->max_payload; + switch (fwnet_get_hdr_lf(&ptask->hdr)) { + case RFC2374_HDR_UNFRAG: + bufhdr = (struct rfc2734_header *) + skb_push(ptask->skb, RFC2374_UNFRAG_HDR_SIZE); + put_unaligned_be32(ptask->hdr.w0, &bufhdr->w0); + break; + + case RFC2374_HDR_FIRSTFRAG: + case RFC2374_HDR_INTFRAG: + case RFC2374_HDR_LASTFRAG: + bufhdr = (struct rfc2734_header *) + skb_push(ptask->skb, RFC2374_FRAG_HDR_SIZE); + put_unaligned_be32(ptask->hdr.w0, &bufhdr->w0); + put_unaligned_be32(ptask->hdr.w1, &bufhdr->w1); + break; + + default: + BUG(); + } + if (ptask->dest_node == IEEE1394_ALL_NODES) { + u8 *p; + int generation; + int node_id; + + /* ptask->generation may not have been set yet */ + generation = dev->card->generation; + smp_rmb(); + node_id = dev->card->node_id; + + p = skb_push(ptask->skb, 8); + put_unaligned_be32(node_id << 16 | IANA_SPECIFIER_ID >> 8, p); + put_unaligned_be32((IANA_SPECIFIER_ID & 0xff) << 24 + | RFC2734_SW_VERSION, &p[4]); + + /* We should not transmit if broadcast_channel.valid == 0. */ + fw_send_request(dev->card, &ptask->transaction, + TCODE_STREAM_DATA, + fw_stream_packet_destination_id(3, + IEEE1394_BROADCAST_CHANNEL, 0), + generation, SCODE_100, 0ULL, ptask->skb->data, + tx_len + 8, fwnet_write_complete, ptask); + + /* FIXME race? */ + spin_lock_irqsave(&dev->lock, flags); + list_add_tail(&ptask->pt_link, &dev->broadcasted_list); + spin_unlock_irqrestore(&dev->lock, flags); + + return 0; + } + + fw_send_request(dev->card, &ptask->transaction, + TCODE_WRITE_BLOCK_REQUEST, ptask->dest_node, + ptask->generation, ptask->speed, ptask->fifo_addr, + ptask->skb->data, tx_len, fwnet_write_complete, ptask); + + /* FIXME race? */ + spin_lock_irqsave(&dev->lock, flags); + list_add_tail(&ptask->pt_link, &dev->sent_list); + spin_unlock_irqrestore(&dev->lock, flags); + + dev->netdev->trans_start = jiffies; + + return 0; +} + +static int fwnet_broadcast_start(struct fwnet_device *dev) +{ + struct fw_iso_context *context; + int retval; + unsigned num_packets; + unsigned max_receive; + struct fw_iso_packet packet; + unsigned long offset; + unsigned u; + + if (dev->local_fifo == FWNET_NO_FIFO_ADDR) { + /* outside OHCI posted write area? */ + static const struct fw_address_region region = { + .start = 0xffff00000000ULL, + .end = CSR_REGISTER_BASE, + }; + + dev->handler.length = 4096; + dev->handler.address_callback = fwnet_receive_packet; + dev->handler.callback_data = dev; + + retval = fw_core_add_address_handler(&dev->handler, ®ion); + if (retval < 0) + goto failed_initial; + + dev->local_fifo = dev->handler.offset; + } + + max_receive = 1U << (dev->card->max_receive + 1); + num_packets = (FWNET_ISO_PAGE_COUNT * PAGE_SIZE) / max_receive; + + if (!dev->broadcast_rcv_context) { + void **ptrptr; + + context = fw_iso_context_create(dev->card, + FW_ISO_CONTEXT_RECEIVE, IEEE1394_BROADCAST_CHANNEL, + dev->card->link_speed, 8, fwnet_receive_broadcast, dev); + if (IS_ERR(context)) { + retval = PTR_ERR(context); + goto failed_context_create; + } + + retval = fw_iso_buffer_init(&dev->broadcast_rcv_buffer, + dev->card, FWNET_ISO_PAGE_COUNT, DMA_FROM_DEVICE); + if (retval < 0) + goto failed_buffer_init; + + ptrptr = kmalloc(sizeof(void *) * num_packets, GFP_KERNEL); + if (!ptrptr) { + retval = -ENOMEM; + goto failed_ptrs_alloc; + } + + dev->broadcast_rcv_buffer_ptrs = ptrptr; + for (u = 0; u < FWNET_ISO_PAGE_COUNT; u++) { + void *ptr; + unsigned v; + + ptr = kmap(dev->broadcast_rcv_buffer.pages[u]); + for (v = 0; v < num_packets / FWNET_ISO_PAGE_COUNT; v++) + *ptrptr++ = (void *) + ((char *)ptr + v * max_receive); + } + dev->broadcast_rcv_context = context; + } else { + context = dev->broadcast_rcv_context; + } + + packet.payload_length = max_receive; + packet.interrupt = 1; + packet.skip = 0; + packet.tag = 3; + packet.sy = 0; + packet.header_length = IEEE1394_GASP_HDR_SIZE; + offset = 0; + + for (u = 0; u < num_packets; u++) { + retval = fw_iso_context_queue(context, &packet, + &dev->broadcast_rcv_buffer, offset); + if (retval < 0) + goto failed_rcv_queue; + + offset += max_receive; + } + dev->num_broadcast_rcv_ptrs = num_packets; + dev->rcv_buffer_size = max_receive; + dev->broadcast_rcv_next_ptr = 0U; + retval = fw_iso_context_start(context, -1, 0, + FW_ISO_CONTEXT_MATCH_ALL_TAGS); /* ??? sync */ + if (retval < 0) + goto failed_rcv_queue; + + /* FIXME: adjust it according to the min. speed of all known peers? */ + dev->broadcast_xmt_max_payload = IEEE1394_MAX_PAYLOAD_S100 + - IEEE1394_GASP_HDR_SIZE - RFC2374_UNFRAG_HDR_SIZE; + dev->broadcast_state = FWNET_BROADCAST_RUNNING; + + return 0; + + failed_rcv_queue: + kfree(dev->broadcast_rcv_buffer_ptrs); + dev->broadcast_rcv_buffer_ptrs = NULL; + failed_ptrs_alloc: + fw_iso_buffer_destroy(&dev->broadcast_rcv_buffer, dev->card); + failed_buffer_init: + fw_iso_context_destroy(context); + dev->broadcast_rcv_context = NULL; + failed_context_create: + fw_core_remove_address_handler(&dev->handler); + failed_initial: + dev->local_fifo = FWNET_NO_FIFO_ADDR; + + return retval; +} + +/* ifup */ +static int fwnet_open(struct net_device *net) +{ + struct fwnet_device *dev = netdev_priv(net); + int ret; + + if (dev->broadcast_state == FWNET_BROADCAST_ERROR) { + ret = fwnet_broadcast_start(dev); + if (ret) + return ret; + } + netif_start_queue(net); + + return 0; +} + +/* ifdown */ +static int fwnet_stop(struct net_device *net) +{ + netif_stop_queue(net); + + /* Deallocate iso context for use by other applications? */ + + return 0; +} + +static int fwnet_tx(struct sk_buff *skb, struct net_device *net) +{ + struct fwnet_header hdr_buf; + struct fwnet_device *dev = netdev_priv(net); + __be16 proto; + u16 dest_node; + unsigned max_payload; + u16 dg_size; + u16 *datagram_label_ptr; + struct fwnet_packet_task *ptask; + struct fwnet_peer *peer; + unsigned long flags; + + ptask = kmem_cache_alloc(fwnet_packet_task_cache, GFP_ATOMIC); + if (ptask == NULL) + goto fail; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto fail; + + /* + * Make a copy of the driver-specific header. + * We might need to rebuild the header on tx failure. + */ + memcpy(&hdr_buf, skb->data, sizeof(hdr_buf)); + skb_pull(skb, sizeof(hdr_buf)); + + proto = hdr_buf.h_proto; + dg_size = skb->len; + + /* serialize access to peer, including peer->datagram_label */ + spin_lock_irqsave(&dev->lock, flags); + + /* + * Set the transmission type for the packet. ARP packets and IP + * broadcast packets are sent via GASP. + */ + if (memcmp(hdr_buf.h_dest, net->broadcast, FWNET_ALEN) == 0 + || proto == htons(ETH_P_ARP) + || (proto == htons(ETH_P_IP) + && IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)))) { + max_payload = dev->broadcast_xmt_max_payload; + datagram_label_ptr = &dev->broadcast_xmt_datagramlabel; + + ptask->fifo_addr = FWNET_NO_FIFO_ADDR; + ptask->generation = 0; + ptask->dest_node = IEEE1394_ALL_NODES; + ptask->speed = SCODE_100; + } else { + __be64 guid = get_unaligned((__be64 *)hdr_buf.h_dest); + u8 generation; + + peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid)); + if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR) + goto fail_unlock; + + generation = peer->generation; + dest_node = peer->node_id; + max_payload = peer->max_payload; + datagram_label_ptr = &peer->datagram_label; + + ptask->fifo_addr = peer->fifo; + ptask->generation = generation; + ptask->dest_node = dest_node; + ptask->speed = peer->speed; + } + + /* If this is an ARP packet, convert it */ + if (proto == htons(ETH_P_ARP)) { + struct arphdr *arp = (struct arphdr *)skb->data; + unsigned char *arp_ptr = (unsigned char *)(arp + 1); + struct rfc2734_arp *arp1394 = (struct rfc2734_arp *)skb->data; + __be32 ipaddr; + + ipaddr = get_unaligned((__be32 *)(arp_ptr + FWNET_ALEN)); + + arp1394->hw_addr_len = RFC2734_HW_ADDR_LEN; + arp1394->max_rec = dev->card->max_receive; + arp1394->sspd = dev->card->link_speed; + + put_unaligned_be16(dev->local_fifo >> 32, + &arp1394->fifo_hi); + put_unaligned_be32(dev->local_fifo & 0xffffffff, + &arp1394->fifo_lo); + put_unaligned(ipaddr, &arp1394->sip); + } + + ptask->hdr.w0 = 0; + ptask->hdr.w1 = 0; + ptask->skb = skb; + ptask->dev = dev; + + /* Does it all fit in one packet? */ + if (dg_size <= max_payload) { + fwnet_make_uf_hdr(&ptask->hdr, ntohs(proto)); + ptask->outstanding_pkts = 1; + max_payload = dg_size + RFC2374_UNFRAG_HDR_SIZE; + } else { + u16 datagram_label; + + max_payload -= RFC2374_FRAG_OVERHEAD; + datagram_label = (*datagram_label_ptr)++; + fwnet_make_ff_hdr(&ptask->hdr, ntohs(proto), dg_size, + datagram_label); + ptask->outstanding_pkts = DIV_ROUND_UP(dg_size, max_payload); + max_payload += RFC2374_FRAG_HDR_SIZE; + } + + spin_unlock_irqrestore(&dev->lock, flags); + + ptask->max_payload = max_payload; + fwnet_send_packet(ptask); + + return NETDEV_TX_OK; + + fail_unlock: + spin_unlock_irqrestore(&dev->lock, flags); + fail: + if (ptask) + kmem_cache_free(fwnet_packet_task_cache, ptask); + + if (skb != NULL) + dev_kfree_skb(skb); + + net->stats.tx_dropped++; + net->stats.tx_errors++; + + /* + * FIXME: According to a patch from 2003-02-26, "returning non-zero + * causes serious problems" here, allegedly. Before that patch, + * -ERRNO was returned which is not appropriate under Linux 2.6. + * Perhaps more needs to be done? Stop the queue in serious + * conditions and restart it elsewhere? + */ + return NETDEV_TX_OK; +} + +static int fwnet_change_mtu(struct net_device *net, int new_mtu) +{ + if (new_mtu < 68) + return -EINVAL; + + net->mtu = new_mtu; + return 0; +} + +static void fwnet_get_drvinfo(struct net_device *net, + struct ethtool_drvinfo *info) +{ + strcpy(info->driver, KBUILD_MODNAME); + strcpy(info->bus_info, "ieee1394"); +} + +static struct ethtool_ops fwnet_ethtool_ops = { + .get_drvinfo = fwnet_get_drvinfo, +}; + +static const struct net_device_ops fwnet_netdev_ops = { + .ndo_open = fwnet_open, + .ndo_stop = fwnet_stop, + .ndo_start_xmit = fwnet_tx, + .ndo_change_mtu = fwnet_change_mtu, +}; + +static void fwnet_init_dev(struct net_device *net) +{ + net->header_ops = &fwnet_header_ops; + net->netdev_ops = &fwnet_netdev_ops; + net->watchdog_timeo = 2 * HZ; + net->flags = IFF_BROADCAST | IFF_MULTICAST; + net->features = NETIF_F_HIGHDMA; + net->addr_len = FWNET_ALEN; + net->hard_header_len = FWNET_HLEN; + net->type = ARPHRD_IEEE1394; + net->tx_queue_len = 10; + SET_ETHTOOL_OPS(net, &fwnet_ethtool_ops); +} + +/* caller must hold fwnet_device_mutex */ +static struct fwnet_device *fwnet_dev_find(struct fw_card *card) +{ + struct fwnet_device *dev; + + list_for_each_entry(dev, &fwnet_device_list, dev_link) + if (dev->card == card) + return dev; + + return NULL; +} + +static int fwnet_add_peer(struct fwnet_device *dev, + struct fw_unit *unit, struct fw_device *device) +{ + struct fwnet_peer *peer; + + peer = kmalloc(sizeof(*peer), GFP_KERNEL); + if (!peer) + return -ENOMEM; + + dev_set_drvdata(&unit->device, peer); + + peer->dev = dev; + peer->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4]; + peer->fifo = FWNET_NO_FIFO_ADDR; + INIT_LIST_HEAD(&peer->pd_list); + peer->pdg_size = 0; + peer->datagram_label = 0; + peer->speed = device->max_speed; + peer->max_payload = fwnet_max_payload(device->max_rec, peer->speed); + + peer->generation = device->generation; + smp_rmb(); + peer->node_id = device->node_id; + + spin_lock_irq(&dev->lock); + list_add_tail(&peer->peer_link, &dev->peer_list); + spin_unlock_irq(&dev->lock); + + return 0; +} + +static int fwnet_probe(struct device *_dev) +{ + struct fw_unit *unit = fw_unit(_dev); + struct fw_device *device = fw_parent_device(unit); + struct fw_card *card = device->card; + struct net_device *net; + bool allocated_netdev = false; + struct fwnet_device *dev; + unsigned max_mtu; + int ret; + + mutex_lock(&fwnet_device_mutex); + + dev = fwnet_dev_find(card); + if (dev) { + net = dev->netdev; + goto have_dev; + } + + net = alloc_netdev(sizeof(*dev), "firewire%d", fwnet_init_dev); + if (net == NULL) { + ret = -ENOMEM; + goto out; + } + + allocated_netdev = true; + SET_NETDEV_DEV(net, card->device); + dev = netdev_priv(net); + + spin_lock_init(&dev->lock); + dev->broadcast_state = FWNET_BROADCAST_ERROR; + dev->broadcast_rcv_context = NULL; + dev->broadcast_xmt_max_payload = 0; + dev->broadcast_xmt_datagramlabel = 0; + + dev->local_fifo = FWNET_NO_FIFO_ADDR; + + INIT_LIST_HEAD(&dev->packet_list); + INIT_LIST_HEAD(&dev->broadcasted_list); + INIT_LIST_HEAD(&dev->sent_list); + INIT_LIST_HEAD(&dev->peer_list); + + dev->card = card; + dev->netdev = net; + + /* + * Use the RFC 2734 default 1500 octets or the maximum payload + * as initial MTU + */ + max_mtu = (1 << (card->max_receive + 1)) + - sizeof(struct rfc2734_header) - IEEE1394_GASP_HDR_SIZE; + net->mtu = min(1500U, max_mtu); + + /* Set our hardware address while we're at it */ + put_unaligned_be64(card->guid, net->dev_addr); + put_unaligned_be64(~0ULL, net->broadcast); + ret = register_netdev(net); + if (ret) { + fw_error("Cannot register the driver\n"); + goto out; + } + + list_add_tail(&dev->dev_link, &fwnet_device_list); + fw_notify("%s: IPv4 over FireWire on device %016llx\n", + net->name, (unsigned long long)card->guid); + have_dev: + ret = fwnet_add_peer(dev, unit, device); + if (ret && allocated_netdev) { + unregister_netdev(net); + list_del(&dev->dev_link); + } + out: + if (ret && allocated_netdev) + free_netdev(net); + + mutex_unlock(&fwnet_device_mutex); + + return ret; +} + +static void fwnet_remove_peer(struct fwnet_peer *peer) +{ + struct fwnet_partial_datagram *pd, *pd_next; + + spin_lock_irq(&peer->dev->lock); + list_del(&peer->peer_link); + spin_unlock_irq(&peer->dev->lock); + + list_for_each_entry_safe(pd, pd_next, &peer->pd_list, pd_link) + fwnet_pd_delete(pd); + + kfree(peer); +} + +static int fwnet_remove(struct device *_dev) +{ + struct fwnet_peer *peer = dev_get_drvdata(_dev); + struct fwnet_device *dev = peer->dev; + struct net_device *net; + struct fwnet_packet_task *ptask, *pt_next; + + mutex_lock(&fwnet_device_mutex); + + fwnet_remove_peer(peer); + + if (list_empty(&dev->peer_list)) { + net = dev->netdev; + unregister_netdev(net); + + if (dev->local_fifo != FWNET_NO_FIFO_ADDR) + fw_core_remove_address_handler(&dev->handler); + if (dev->broadcast_rcv_context) { + fw_iso_context_stop(dev->broadcast_rcv_context); + fw_iso_buffer_destroy(&dev->broadcast_rcv_buffer, + dev->card); + fw_iso_context_destroy(dev->broadcast_rcv_context); + } + list_for_each_entry_safe(ptask, pt_next, + &dev->packet_list, pt_link) { + dev_kfree_skb_any(ptask->skb); + kmem_cache_free(fwnet_packet_task_cache, ptask); + } + list_for_each_entry_safe(ptask, pt_next, + &dev->broadcasted_list, pt_link) { + dev_kfree_skb_any(ptask->skb); + kmem_cache_free(fwnet_packet_task_cache, ptask); + } + list_for_each_entry_safe(ptask, pt_next, + &dev->sent_list, pt_link) { + dev_kfree_skb_any(ptask->skb); + kmem_cache_free(fwnet_packet_task_cache, ptask); + } + list_del(&dev->dev_link); + + free_netdev(net); + } + + mutex_unlock(&fwnet_device_mutex); + + return 0; +} + +/* + * FIXME abort partially sent fragmented datagrams, + * discard partially received fragmented datagrams + */ +static void fwnet_update(struct fw_unit *unit) +{ + struct fw_device *device = fw_parent_device(unit); + struct fwnet_peer *peer = dev_get_drvdata(&unit->device); + int generation; + + generation = device->generation; + + spin_lock_irq(&peer->dev->lock); + peer->node_id = device->node_id; + peer->generation = generation; + spin_unlock_irq(&peer->dev->lock); +} + +static const struct ieee1394_device_id fwnet_id_table[] = { + { + .match_flags = IEEE1394_MATCH_SPECIFIER_ID | + IEEE1394_MATCH_VERSION, + .specifier_id = IANA_SPECIFIER_ID, + .version = RFC2734_SW_VERSION, + }, + { } +}; + +static struct fw_driver fwnet_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "net", + .bus = &fw_bus_type, + .probe = fwnet_probe, + .remove = fwnet_remove, + }, + .update = fwnet_update, + .id_table = fwnet_id_table, +}; + +static const u32 rfc2374_unit_directory_data[] = { + 0x00040000, /* directory_length */ + 0x1200005e, /* unit_specifier_id: IANA */ + 0x81000003, /* textual descriptor offset */ + 0x13000001, /* unit_sw_version: RFC 2734 */ + 0x81000005, /* textual descriptor offset */ + 0x00030000, /* descriptor_length */ + 0x00000000, /* text */ + 0x00000000, /* minimal ASCII, en */ + 0x49414e41, /* I A N A */ + 0x00030000, /* descriptor_length */ + 0x00000000, /* text */ + 0x00000000, /* minimal ASCII, en */ + 0x49507634, /* I P v 4 */ +}; + +static struct fw_descriptor rfc2374_unit_directory = { + .length = ARRAY_SIZE(rfc2374_unit_directory_data), + .key = (CSR_DIRECTORY | CSR_UNIT) << 24, + .data = rfc2374_unit_directory_data +}; + +static int __init fwnet_init(void) +{ + int err; + + err = fw_core_add_descriptor(&rfc2374_unit_directory); + if (err) + return err; + + fwnet_packet_task_cache = kmem_cache_create("packet_task", + sizeof(struct fwnet_packet_task), 0, 0, NULL); + if (!fwnet_packet_task_cache) { + err = -ENOMEM; + goto out; + } + + err = driver_register(&fwnet_driver.driver); + if (!err) + return 0; + + kmem_cache_destroy(fwnet_packet_task_cache); +out: + fw_core_remove_descriptor(&rfc2374_unit_directory); + + return err; +} +module_init(fwnet_init); + +static void __exit fwnet_cleanup(void) +{ + driver_unregister(&fwnet_driver.driver); + kmem_cache_destroy(fwnet_packet_task_cache); + fw_core_remove_descriptor(&rfc2374_unit_directory); +} +module_exit(fwnet_cleanup); + +MODULE_AUTHOR("Jay Fenlason <fenlason@redhat.com>"); +MODULE_DESCRIPTION("IPv4 over IEEE1394 as per RFC 2734"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(ieee1394, fwnet_id_table); diff --git a/drivers/ieee1394/Kconfig b/drivers/ieee1394/Kconfig index 95f45f9b8e5e..f102fcc7e52a 100644 --- a/drivers/ieee1394/Kconfig +++ b/drivers/ieee1394/Kconfig @@ -4,7 +4,7 @@ menu "IEEE 1394 (FireWire) support" source "drivers/firewire/Kconfig" config IEEE1394 - tristate "Stable FireWire stack" + tristate "Legacy alternative FireWire driver stack" depends on PCI || BROKEN help IEEE 1394 describes a high performance serial bus, which is also @@ -33,11 +33,9 @@ config IEEE1394_OHCI1394 module will be called ohci1394. NOTE: - - You should only build either ohci1394 or the new firewire-ohci driver, - but not both. If you nevertheless want to install both, you should - configure them only as modules and blacklist the driver(s) which you - don't want to have auto-loaded. Add either + If you want to install firewire-ohci and ohci1394 together, you + should configure them only as modules and blacklist the driver(s) + which you don't want to have auto-loaded. Add either blacklist firewire-ohci or @@ -46,12 +44,7 @@ config IEEE1394_OHCI1394 blacklist dv1394 to /etc/modprobe.conf or /etc/modprobe.d/* and update modprobe.conf - depending on your distribution. The latter two modules should be - blacklisted together with ohci1394 because they depend on ohci1394. - - If you have an old modprobe which doesn't implement the blacklist - directive, use "install modulename /bin/true" for the modules to be - blacklisted. + depending on your distribution. comment "PCILynx controller requires I2C" depends on IEEE1394 && I2C=n @@ -105,7 +98,7 @@ config IEEE1394_ETH1394_ROM_ENTRY default n config IEEE1394_ETH1394 - tristate "IP over 1394" + tristate "IP networking over 1394 (experimental)" depends on IEEE1394 && EXPERIMENTAL && INET select IEEE1394_ETH1394_ROM_ENTRY help diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index fa3a11365ec3..7b287cb38b7a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -267,6 +267,84 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) } return ret; } + +static LIST_HEAD(dmar_atsr_units); + +static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) +{ + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + atsr = container_of(hdr, struct acpi_dmar_atsr, header); + atsru = kzalloc(sizeof(*atsru), GFP_KERNEL); + if (!atsru) + return -ENOMEM; + + atsru->hdr = hdr; + atsru->include_all = atsr->flags & 0x1; + + list_add(&atsru->list, &dmar_atsr_units); + + return 0; +} + +static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) +{ + int rc; + struct acpi_dmar_atsr *atsr; + + if (atsru->include_all) + return 0; + + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + rc = dmar_parse_dev_scope((void *)(atsr + 1), + (void *)atsr + atsr->header.length, + &atsru->devices_cnt, &atsru->devices, + atsr->segment); + if (rc || !atsru->devices_cnt) { + list_del(&atsru->list); + kfree(atsru); + } + + return rc; +} + +int dmar_find_matched_atsr_unit(struct pci_dev *dev) +{ + int i; + struct pci_bus *bus; + struct acpi_dmar_atsr *atsr; + struct dmar_atsr_unit *atsru; + + list_for_each_entry(atsru, &dmar_atsr_units, list) { + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + if (atsr->segment == pci_domain_nr(dev->bus)) + goto found; + } + + return 0; + +found: + for (bus = dev->bus; bus; bus = bus->parent) { + struct pci_dev *bridge = bus->self; + + if (!bridge || !bridge->is_pcie || + bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) + return 0; + + if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) { + for (i = 0; i < atsru->devices_cnt; i++) + if (atsru->devices[i] == bridge) + return 1; + break; + } + } + + if (atsru->include_all) + return 1; + + return 0; +} #endif static void __init @@ -274,22 +352,28 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) { struct acpi_dmar_hardware_unit *drhd; struct acpi_dmar_reserved_memory *rmrr; + struct acpi_dmar_atsr *atsr; switch (header->type) { case ACPI_DMAR_TYPE_HARDWARE_UNIT: - drhd = (struct acpi_dmar_hardware_unit *)header; + drhd = container_of(header, struct acpi_dmar_hardware_unit, + header); printk (KERN_INFO PREFIX - "DRHD (flags: 0x%08x)base: 0x%016Lx\n", - drhd->flags, (unsigned long long)drhd->address); + "DRHD base: %#016Lx flags: %#x\n", + (unsigned long long)drhd->address, drhd->flags); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: - rmrr = (struct acpi_dmar_reserved_memory *)header; - + rmrr = container_of(header, struct acpi_dmar_reserved_memory, + header); printk (KERN_INFO PREFIX - "RMRR base: 0x%016Lx end: 0x%016Lx\n", + "RMRR base: %#016Lx end: %#016Lx\n", (unsigned long long)rmrr->base_address, (unsigned long long)rmrr->end_address); break; + case ACPI_DMAR_TYPE_ATSR: + atsr = container_of(header, struct acpi_dmar_atsr, header); + printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags); + break; } } @@ -363,6 +447,11 @@ parse_dmar_table(void) ret = dmar_parse_one_rmrr(entry_header); #endif break; + case ACPI_DMAR_TYPE_ATSR: +#ifdef CONFIG_DMAR + ret = dmar_parse_one_atsr(entry_header); +#endif + break; default: printk(KERN_WARNING PREFIX "Unknown DMAR structure type\n"); @@ -431,11 +520,19 @@ int __init dmar_dev_scope_init(void) #ifdef CONFIG_DMAR { struct dmar_rmrr_unit *rmrr, *rmrr_n; + struct dmar_atsr_unit *atsr, *atsr_n; + list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) { ret = rmrr_parse_dev(rmrr); if (ret) return ret; } + + list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) { + ret = atsr_parse_dev(atsr); + if (ret) + return ret; + } } #endif @@ -468,6 +565,9 @@ int __init dmar_table_init(void) #ifdef CONFIG_DMAR if (list_empty(&dmar_rmrr_units)) printk(KERN_INFO PREFIX "No RMRR found\n"); + + if (list_empty(&dmar_atsr_units)) + printk(KERN_INFO PREFIX "No ATSR found\n"); #endif #ifdef CONFIG_INTR_REMAP @@ -515,6 +615,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) u32 ver; static int iommu_allocated = 0; int agaw = 0; + int msagaw = 0; iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -535,12 +636,20 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { printk(KERN_ERR - "Cannot get a valid agaw for iommu (seq_id = %d)\n", + "Cannot get a valid agaw for iommu (seq_id = %d)\n", + iommu->seq_id); + goto error; + } + msagaw = iommu_calculate_max_sagaw(iommu); + if (msagaw < 0) { + printk(KERN_ERR + "Cannot get a valid max agaw for iommu (seq_id = %d)\n", iommu->seq_id); goto error; } #endif iommu->agaw = agaw; + iommu->msagaw = msagaw; /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), @@ -590,7 +699,8 @@ void free_iommu(struct intel_iommu *iommu) */ static inline void reclaim_free_desc(struct q_inval *qi) { - while (qi->desc_status[qi->free_tail] == QI_DONE) { + while (qi->desc_status[qi->free_tail] == QI_DONE || + qi->desc_status[qi->free_tail] == QI_ABORT) { qi->desc_status[qi->free_tail] = QI_FREE; qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; qi->free_cnt++; @@ -600,10 +710,13 @@ static inline void reclaim_free_desc(struct q_inval *qi) static int qi_check_fault(struct intel_iommu *iommu, int index) { u32 fault; - int head; + int head, tail; struct q_inval *qi = iommu->qi; int wait_index = (index + 1) % QI_LENGTH; + if (qi->desc_status[wait_index] == QI_ABORT) + return -EAGAIN; + fault = readl(iommu->reg + DMAR_FSTS_REG); /* @@ -613,7 +726,11 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ if (fault & DMA_FSTS_IQE) { head = readl(iommu->reg + DMAR_IQH_REG); - if ((head >> 4) == index) { + if ((head >> DMAR_IQ_SHIFT) == index) { + printk(KERN_ERR "VT-d detected invalid descriptor: " + "low=%llx, high=%llx\n", + (unsigned long long)qi->desc[index].low, + (unsigned long long)qi->desc[index].high); memcpy(&qi->desc[index], &qi->desc[wait_index], sizeof(struct qi_desc)); __iommu_flush_cache(iommu, &qi->desc[index], @@ -623,6 +740,32 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) } } + /* + * If ITE happens, all pending wait_desc commands are aborted. + * No new descriptors are fetched until the ITE is cleared. + */ + if (fault & DMA_FSTS_ITE) { + head = readl(iommu->reg + DMAR_IQH_REG); + head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + head |= 1; + tail = readl(iommu->reg + DMAR_IQT_REG); + tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH; + + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); + + do { + if (qi->desc_status[head] == QI_IN_USE) + qi->desc_status[head] = QI_ABORT; + head = (head - 2 + QI_LENGTH) % QI_LENGTH; + } while (head != tail); + + if (qi->desc_status[wait_index] == QI_ABORT) + return -EAGAIN; + } + + if (fault & DMA_FSTS_ICE) + writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG); + return 0; } @@ -632,7 +775,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) */ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) { - int rc = 0; + int rc; struct q_inval *qi = iommu->qi; struct qi_desc *hw, wait_desc; int wait_index, index; @@ -643,6 +786,9 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) hw = qi->desc; +restart: + rc = 0; + spin_lock_irqsave(&qi->q_lock, flags); while (qi->free_cnt < 3) { spin_unlock_irqrestore(&qi->q_lock, flags); @@ -673,7 +819,7 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) * update the HW tail register indicating the presence of * new descriptors. */ - writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); + writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG); while (qi->desc_status[wait_index] != QI_DONE) { /* @@ -685,18 +831,21 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) */ rc = qi_check_fault(iommu, index); if (rc) - goto out; + break; spin_unlock(&qi->q_lock); cpu_relax(); spin_lock(&qi->q_lock); } -out: - qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE; + + qi->desc_status[index] = QI_DONE; reclaim_free_desc(qi); spin_unlock_irqrestore(&qi->q_lock, flags); + if (rc == -EAGAIN) + goto restart; + return rc; } @@ -714,41 +863,26 @@ void qi_global_iec(struct intel_iommu *iommu) qi_submit_sync(&desc, iommu); } -int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, - u64 type, int non_present_entry_flush) +void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, + u64 type) { struct qi_desc desc; - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) | QI_CC_GRAN(type) | QI_CC_TYPE; desc.high = 0; - return qi_submit_sync(&desc, iommu); + qi_submit_sync(&desc, iommu); } -int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, - int non_present_entry_flush) +void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type) { u8 dw = 0, dr = 0; struct qi_desc desc; int ih = 0; - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - if (cap_write_drain(iommu->cap)) dw = 1; @@ -760,7 +894,28 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) | QI_IOTLB_AM(size_order); - return qi_submit_sync(&desc, iommu); + qi_submit_sync(&desc, iommu); +} + +void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, + u64 addr, unsigned mask) +{ + struct qi_desc desc; + + if (mask) { + BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1)); + addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1; + desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; + } else + desc.high = QI_DEV_IOTLB_ADDR(addr); + + if (qdep >= QI_DEV_IOTLB_MAX_INVS) + qdep = 0; + + desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | + QI_DIOTLB_TYPE; + + qi_submit_sync(&desc, iommu); } /* @@ -790,7 +945,6 @@ void dmar_disable_qi(struct intel_iommu *iommu) cpu_relax(); iommu->gcmd &= ~DMA_GCMD_QIE; - writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, @@ -804,7 +958,7 @@ end: */ static void __dmar_enable_qi(struct intel_iommu *iommu) { - u32 cmd, sts; + u32 sts; unsigned long flags; struct q_inval *qi = iommu->qi; @@ -818,9 +972,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); - cmd = iommu->gcmd | DMA_GCMD_QIE; iommu->gcmd |= DMA_GCMD_QIE; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); @@ -1096,7 +1249,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) set_irq_data(irq, NULL); iommu->irq = 0; destroy_irq(irq); - return 0; + return ret; } ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index cd389162735f..178853a07440 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -53,6 +53,8 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 +#define MAX_AGAW_WIDTH 64 + #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) @@ -131,8 +133,6 @@ static inline void context_set_fault_enable(struct context_entry *context) context->lo &= (((u64)-1) << 2) | 1; } -#define CONTEXT_TT_MULTI_LEVEL 0 - static inline void context_set_translation_type(struct context_entry *context, unsigned long value) { @@ -256,6 +256,7 @@ struct device_domain_info { u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ }; @@ -401,17 +402,13 @@ void free_iova_mem(struct iova *iova) static inline int width_to_agaw(int width); -/* calculate agaw for each iommu. - * "SAGAW" may be different across iommus, use a default agaw, and - * get a supported less agaw for iommus that don't support the default agaw. - */ -int iommu_calculate_agaw(struct intel_iommu *iommu) +static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) { unsigned long sagaw; int agaw = -1; sagaw = cap_sagaw(iommu->cap); - for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); + for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) { if (test_bit(agaw, &sagaw)) break; @@ -420,6 +417,24 @@ int iommu_calculate_agaw(struct intel_iommu *iommu) return agaw; } +/* + * Calculate max SAGAW for each iommu. + */ +int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); +} + +/* + * calculate agaw for each iommu. + * "SAGAW" may be different across iommus, use a default agaw, and + * get a supported less agaw for iommus that don't support the default agaw. + */ +int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); +} + /* in native case, each domain is related to only one iommu */ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { @@ -809,7 +824,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) static void iommu_set_root_entry(struct intel_iommu *iommu) { void *addr; - u32 cmd, sts; + u32 sts; unsigned long flag; addr = iommu->root_entry; @@ -817,12 +832,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); - cmd = iommu->gcmd | DMA_GCMD_SRTP; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_RTPS), sts); + readl, (sts & DMA_GSTS_RTPS), sts); spin_unlock_irqrestore(&iommu->register_lock, flag); } @@ -834,39 +848,25 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu) if (!rwbf_quirk && !cap_rwbf(iommu->cap)) return; - val = iommu->gcmd | DMA_GCMD_WBF; spin_lock_irqsave(&iommu->register_lock, flag); - writel(val, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (!(val & DMA_GSTS_WBFS)), val); + readl, (!(val & DMA_GSTS_WBFS)), val); spin_unlock_irqrestore(&iommu->register_lock, flag); } /* return value determine if we need a write buffer flush */ -static int __iommu_flush_context(struct intel_iommu *iommu, - u16 did, u16 source_id, u8 function_mask, u64 type, - int non_present_entry_flush) +static void __iommu_flush_context(struct intel_iommu *iommu, + u16 did, u16 source_id, u8 function_mask, + u64 type) { u64 val = 0; unsigned long flag; - /* - * In the non-present entry flush case, if hardware doesn't cache - * non-present entry we do nothing and if hardware cache non-present - * entry, we flush entries of domain 0 (the domain id is used to cache - * any non-present entries) - */ - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - switch (type) { case DMA_CCMD_GLOBAL_INVL: val = DMA_CCMD_GLOBAL_INVL; @@ -891,33 +891,16 @@ static int __iommu_flush_context(struct intel_iommu *iommu, dmar_readq, (!(val & DMA_CCMD_ICC)), val); spin_unlock_irqrestore(&iommu->register_lock, flag); - - /* flush context entry will implicitly flush write buffer */ - return 0; } /* return value determine if we need a write buffer flush */ -static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int size_order, u64 type, - int non_present_entry_flush) +static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, + u64 addr, unsigned int size_order, u64 type) { int tlb_offset = ecap_iotlb_offset(iommu->ecap); u64 val = 0, val_iva = 0; unsigned long flag; - /* - * In the non-present entry flush case, if hardware doesn't cache - * non-present entry we do nothing and if hardware cache non-present - * entry, we flush entries of domain 0 (the domain id is used to cache - * any non-present entries) - */ - if (non_present_entry_flush) { - if (!cap_caching_mode(iommu->cap)) - return 1; - else - did = 0; - } - switch (type) { case DMA_TLB_GLOBAL_FLUSH: /* global flush doesn't need set IVA_REG */ @@ -965,37 +948,101 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", (unsigned long long)DMA_TLB_IIRG(type), (unsigned long long)DMA_TLB_IAIG(val)); - /* flush iotlb entry will implicitly flush write buffer */ - return 0; } -static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int pages, int non_present_entry_flush) +static struct device_domain_info *iommu_support_dev_iotlb( + struct dmar_domain *domain, int segment, u8 bus, u8 devfn) +{ + int found = 0; + unsigned long flags; + struct device_domain_info *info; + struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); + + if (!ecap_dev_iotlb_support(iommu->ecap)) + return NULL; + + if (!iommu->qi) + return NULL; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) + if (info->bus == bus && info->devfn == devfn) { + found = 1; + break; + } + spin_unlock_irqrestore(&device_domain_lock, flags); + + if (!found || !info->dev) + return NULL; + + if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) + return NULL; + + if (!dmar_find_matched_atsr_unit(info->dev)) + return NULL; + + info->iommu = iommu; + + return info; +} + +static void iommu_enable_dev_iotlb(struct device_domain_info *info) { - unsigned int mask; + if (!info) + return; + + pci_enable_ats(info->dev, VTD_PAGE_SHIFT); +} + +static void iommu_disable_dev_iotlb(struct device_domain_info *info) +{ + if (!info->dev || !pci_ats_enabled(info->dev)) + return; + + pci_disable_ats(info->dev); +} + +static void iommu_flush_dev_iotlb(struct dmar_domain *domain, + u64 addr, unsigned mask) +{ + u16 sid, qdep; + unsigned long flags; + struct device_domain_info *info; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) { + if (!info->dev || !pci_ats_enabled(info->dev)) + continue; + + sid = info->bus << 8 | info->devfn; + qdep = pci_ats_queue_depth(info->dev); + qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); + } + spin_unlock_irqrestore(&device_domain_lock, flags); +} + +static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, + u64 addr, unsigned int pages) +{ + unsigned int mask = ilog2(__roundup_pow_of_two(pages)); BUG_ON(addr & (~VTD_PAGE_MASK)); BUG_ON(pages == 0); - /* Fallback to domain selective flush if no PSI support */ - if (!cap_pgsel_inv(iommu->cap)) - return iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH, - non_present_entry_flush); - /* + * Fallback to domain selective flush if no PSI support or the size is + * too big. * PSI requires page size to be 2 ^ x, and the base address is naturally * aligned to the size */ - mask = ilog2(__roundup_pow_of_two(pages)); - /* Fallback to domain selective flush if size is too big */ - if (mask > cap_max_amask_val(iommu->cap)) - return iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH, non_present_entry_flush); - - return iommu->flush.flush_iotlb(iommu, did, addr, mask, - DMA_TLB_PSI_FLUSH, - non_present_entry_flush); + if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, did, addr, mask, + DMA_TLB_PSI_FLUSH); + if (did) + iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1021,13 +1068,13 @@ static int iommu_enable_translation(struct intel_iommu *iommu) unsigned long flags; spin_lock_irqsave(&iommu->register_lock, flags); - writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG); + iommu->gcmd |= DMA_GCMD_TE; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_TES), sts); + readl, (sts & DMA_GSTS_TES), sts); - iommu->gcmd |= DMA_GCMD_TE; spin_unlock_irqrestore(&iommu->register_lock, flags); return 0; } @@ -1043,7 +1090,7 @@ static int iommu_disable_translation(struct intel_iommu *iommu) /* Make sure hardware complete it */ IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (!(sts & DMA_GSTS_TES)), sts); + readl, (!(sts & DMA_GSTS_TES)), sts); spin_unlock_irqrestore(&iommu->register_lock, flag); return 0; @@ -1325,8 +1372,8 @@ static void domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } -static int domain_context_mapping_one(struct dmar_domain *domain, - int segment, u8 bus, u8 devfn) +static int domain_context_mapping_one(struct dmar_domain *domain, int segment, + u8 bus, u8 devfn, int translation) { struct context_entry *context; unsigned long flags; @@ -1336,10 +1383,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain, unsigned long ndomains; int id; int agaw; + struct device_domain_info *info = NULL; pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + BUG_ON(!domain->pgd); + BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && + translation != CONTEXT_TT_MULTI_LEVEL); iommu = device_to_iommu(segment, bus, devfn); if (!iommu) @@ -1399,21 +1450,44 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } context_set_domain_id(context, id); - context_set_address_width(context, iommu->agaw); - context_set_address_root(context, virt_to_phys(pgd)); - context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); + + if (translation != CONTEXT_TT_PASS_THROUGH) { + info = iommu_support_dev_iotlb(domain, segment, bus, devfn); + translation = info ? CONTEXT_TT_DEV_IOTLB : + CONTEXT_TT_MULTI_LEVEL; + } + /* + * In pass through mode, AW must be programmed to indicate the largest + * AGAW value supported by hardware. And ASR is ignored by hardware. + */ + if (unlikely(translation == CONTEXT_TT_PASS_THROUGH)) + context_set_address_width(context, iommu->msagaw); + else { + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, iommu->agaw); + } + + context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); domain_flush_cache(domain, context, sizeof(*context)); - /* it's a non-present to present mapping */ - if (iommu->flush.flush_context(iommu, domain->id, - (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL, 1)) + /* + * It's a non-present to present mapping. If hardware doesn't cache + * non-present entry we only need to flush the write-buffer. If the + * _does_ cache non-present entries, then it does so in the special + * domain #0, which we have to flush: + */ + if (cap_caching_mode(iommu->cap)) { + iommu->flush.flush_context(iommu, 0, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); + } else { iommu_flush_write_buffer(iommu); - else - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); - + } + iommu_enable_dev_iotlb(info); spin_unlock_irqrestore(&iommu->lock, flags); spin_lock_irqsave(&domain->iommu_lock, flags); @@ -1426,13 +1500,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } static int -domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) +domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, + int translation) { int ret; struct pci_dev *tmp, *parent; ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), - pdev->bus->number, pdev->devfn); + pdev->bus->number, pdev->devfn, + translation); if (ret) return ret; @@ -1446,7 +1522,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) ret = domain_context_mapping_one(domain, pci_domain_nr(parent->bus), parent->bus->number, - parent->devfn); + parent->devfn, translation); if (ret) return ret; parent = parent->bus->self; @@ -1454,12 +1530,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ return domain_context_mapping_one(domain, pci_domain_nr(tmp->subordinate), - tmp->subordinate->number, 0); + tmp->subordinate->number, 0, + translation); else /* this is a legacy PCI bridge */ return domain_context_mapping_one(domain, pci_domain_nr(tmp->bus), tmp->bus->number, - tmp->devfn); + tmp->devfn, + translation); } static int domain_context_mapped(struct pci_dev *pdev) @@ -1540,9 +1618,8 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) clear_context_table(iommu, bus, devfn); iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL, 0); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_CCMD_GLOBAL_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } static void domain_remove_dev_info(struct dmar_domain *domain) @@ -1561,6 +1638,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); + iommu_disable_dev_iotlb(info); iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); free_devinfo_mem(info); @@ -1756,7 +1834,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, goto error; /* context entry init */ - ret = domain_context_mapping(domain, pdev); + ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); if (!ret) return 0; error: @@ -1857,6 +1935,23 @@ static inline void iommu_prepare_isa(void) } #endif /* !CONFIG_DMAR_FLPY_WA */ +/* Initialize each context entry as pass through.*/ +static int __init init_context_pass_through(void) +{ + struct pci_dev *pdev = NULL; + struct dmar_domain *domain; + int ret; + + for_each_pci_dev(pdev) { + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + ret = domain_context_mapping(domain, pdev, + CONTEXT_TT_PASS_THROUGH); + if (ret) + return ret; + } + return 0; +} + static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -1864,6 +1959,7 @@ static int __init init_dmars(void) struct pci_dev *pdev; struct intel_iommu *iommu; int i, ret; + int pass_through = 1; /* * for each drhd @@ -1917,7 +2013,15 @@ static int __init init_dmars(void) printk(KERN_ERR "IOMMU: allocate root entry failed\n"); goto error; } + if (!ecap_pass_through(iommu->ecap)) + pass_through = 0; } + if (iommu_pass_through) + if (!pass_through) { + printk(KERN_INFO + "Pass Through is not supported by hardware.\n"); + iommu_pass_through = 0; + } /* * Start from the sane iommu hardware state. @@ -1973,35 +2077,56 @@ static int __init init_dmars(void) } /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor + * If pass through is set and enabled, context entries of all pci + * devices are intialized by pass through translation type. */ - for_each_rmrr_units(rmrr) { - for (i = 0; i < rmrr->devices_cnt; i++) { - pdev = rmrr->devices[i]; - /* some BIOS lists non-exist devices in DMAR table */ - if (!pdev) - continue; - ret = iommu_prepare_rmrr_dev(rmrr, pdev); - if (ret) - printk(KERN_ERR - "IOMMU: mapping reserved region failed\n"); + if (iommu_pass_through) { + ret = init_context_pass_through(); + if (ret) { + printk(KERN_ERR "IOMMU: Pass through init failed.\n"); + iommu_pass_through = 0; } } - iommu_prepare_gfx_mapping(); + /* + * If pass through is not set or not enabled, setup context entries for + * identity mappings for rmrr, gfx, and isa. + */ + if (!iommu_pass_through) { + /* + * For each rmrr + * for each dev attached to rmrr + * do + * locate drhd for dev, alloc domain for dev + * allocate free domain + * allocate page table entries for rmrr + * if context not allocated for bus + * allocate and init context + * set present in root table for this bus + * init context with domain, translation etc + * endfor + * endfor + */ + for_each_rmrr_units(rmrr) { + for (i = 0; i < rmrr->devices_cnt; i++) { + pdev = rmrr->devices[i]; + /* + * some BIOS lists non-exist devices in DMAR + * table. + */ + if (!pdev) + continue; + ret = iommu_prepare_rmrr_dev(rmrr, pdev); + if (ret) + printk(KERN_ERR + "IOMMU: mapping reserved region failed\n"); + } + } + + iommu_prepare_gfx_mapping(); - iommu_prepare_isa(); + iommu_prepare_isa(); + } /* * for each drhd @@ -2023,10 +2148,8 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, - 0); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, - 0); + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); iommu_disable_protect_mem_regions(iommu); ret = iommu_enable_translation(iommu); @@ -2112,7 +2235,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev) /* make sure context mapping is ok */ if (unlikely(!domain_context_mapped(pdev))) { - ret = domain_context_mapping(domain, pdev); + ret = domain_context_mapping(domain, pdev, + CONTEXT_TT_MULTI_LEVEL); if (ret) { printk(KERN_ERR "Domain context map for %s failed", @@ -2173,10 +2297,11 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, if (ret) goto error; - /* it's a non-present to present mapping */ - ret = iommu_flush_iotlb_psi(iommu, domain->id, - start_paddr, size >> VTD_PAGE_SHIFT, 1); - if (ret) + /* it's a non-present to present mapping. Only flush if caching mode */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, 0, start_paddr, + size >> VTD_PAGE_SHIFT); + else iommu_flush_write_buffer(iommu); return start_paddr + ((u64)paddr & (~PAGE_MASK)); @@ -2210,15 +2335,22 @@ static void flush_unmaps(void) if (!iommu) continue; - if (deferred_flush[i].next) { - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); - for (j = 0; j < deferred_flush[i].next; j++) { - __free_iova(&deferred_flush[i].domain[j]->iovad, - deferred_flush[i].iova[j]); - } - deferred_flush[i].next = 0; + if (!deferred_flush[i].next) + continue; + + iommu->flush.flush_iotlb(iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH); + for (j = 0; j < deferred_flush[i].next; j++) { + unsigned long mask; + struct iova *iova = deferred_flush[i].iova[j]; + + mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT; + mask = ilog2(mask >> VTD_PAGE_SHIFT); + iommu_flush_dev_iotlb(deferred_flush[i].domain[j], + iova->pfn_lo << PAGE_SHIFT, mask); + __free_iova(&deferred_flush[i].domain[j]->iovad, iova); } + deferred_flush[i].next = 0; } list_size = 0; @@ -2291,9 +2423,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (intel_iommu_strict) { - if (iommu_flush_iotlb_psi(iommu, - domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) - iommu_flush_write_buffer(iommu); + iommu_flush_iotlb_psi(iommu, domain->id, start_addr, + size >> VTD_PAGE_SHIFT); /* free iova */ __free_iova(&domain->iovad, iova); } else { @@ -2384,9 +2515,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); - if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, - size >> VTD_PAGE_SHIFT, 0)) - iommu_flush_write_buffer(iommu); + iommu_flush_iotlb_psi(iommu, domain->id, start_addr, + size >> VTD_PAGE_SHIFT); /* free iova */ __free_iova(&domain->iovad, iova); @@ -2478,10 +2608,13 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne offset += size; } - /* it's a non-present to present mapping */ - if (iommu_flush_iotlb_psi(iommu, domain->id, - start_addr, offset >> VTD_PAGE_SHIFT, 1)) + /* it's a non-present to present mapping. Only flush if caching mode */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, 0, start_addr, + offset >> VTD_PAGE_SHIFT); + else iommu_flush_write_buffer(iommu); + return nelems; } @@ -2640,9 +2773,9 @@ static int init_iommu_hw(void) iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL, 0); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); iommu_disable_protect_mem_regions(iommu); iommu_enable_translation(iommu); } @@ -2657,9 +2790,9 @@ static void iommu_flush_all(void) for_each_active_iommu(iommu, drhd) { iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL, 0); + DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH, 0); + DMA_TLB_GLOBAL_FLUSH); } } @@ -2782,7 +2915,7 @@ int __init intel_iommu_init(void) * Check the need for DMA-remapping initialization now. * Above initialization will also be used by Interrupt-remapping. */ - if (no_iommu || swiotlb || dmar_disabled) + if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled) return -ENODEV; iommu_init_mempool(); @@ -2802,7 +2935,15 @@ int __init intel_iommu_init(void) init_timer(&unmap_timer); force_iommu = 1; - dma_ops = &intel_dma_ops; + + if (!iommu_pass_through) { + printk(KERN_INFO + "Multi-level page-table translation for DMAR.\n"); + dma_ops = &intel_dma_ops; + } else + printk(KERN_INFO + "DMAR: Pass through translation for DMAR.\n"); + init_iommu_sysfs(); register_iommu(&intel_iommu_ops); @@ -2888,6 +3029,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); + iommu_disable_dev_iotlb(info); iommu_detach_dev(iommu, info->bus, info->devfn); iommu_detach_dependent_devices(iommu, pdev); free_devinfo_mem(info); @@ -2938,6 +3080,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags1); + iommu_disable_dev_iotlb(info); iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); iommu_detach_dependent_devices(iommu, info->dev); @@ -3142,11 +3285,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EFAULT; } - ret = domain_context_mapping(dmar_domain, pdev); + ret = vm_domain_add_dev_info(dmar_domain, pdev); if (ret) return ret; - ret = vm_domain_add_dev_info(dmar_domain, pdev); + ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); return ret; } diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 3a0cb0bb0593..1e83c8c5f985 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -409,7 +409,7 @@ int free_irte(int irq) static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) { u64 addr; - u32 cmd, sts; + u32 sts; unsigned long flags; addr = virt_to_phys((void *)iommu->ir_table->base); @@ -420,9 +420,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); /* Set interrupt-remapping table pointer */ - cmd = iommu->gcmd | DMA_GCMD_SIRTP; iommu->gcmd |= DMA_GCMD_SIRTP; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRTPS), sts); @@ -437,9 +436,8 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) spin_lock_irqsave(&iommu->register_lock, flags); /* Enable interrupt-remapping */ - cmd = iommu->gcmd | DMA_GCMD_IRE; iommu->gcmd |= DMA_GCMD_IRE; - writel(cmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRES), sts); diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 03c7706c0a09..e3a87210e947 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -5,6 +5,7 @@ * * PCI Express I/O Virtualization (IOV) support. * Single Root IOV 1.0 + * Address Translation Service 1.0 */ #include <linux/pci.h> @@ -492,10 +493,10 @@ found: if (pdev) iov->dev = pci_dev_get(pdev); - else { + else iov->dev = dev; - mutex_init(&iov->lock); - } + + mutex_init(&iov->lock); dev->sriov = iov; dev->is_physfn = 1; @@ -515,11 +516,11 @@ static void sriov_release(struct pci_dev *dev) { BUG_ON(dev->sriov->nr_virtfn); - if (dev == dev->sriov->dev) - mutex_destroy(&dev->sriov->lock); - else + if (dev != dev->sriov->dev) pci_dev_put(dev->sriov->dev); + mutex_destroy(&dev->sriov->lock); + kfree(dev->sriov); dev->sriov = NULL; } @@ -681,3 +682,145 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev) return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE; } EXPORT_SYMBOL_GPL(pci_sriov_migration); + +static int ats_alloc_one(struct pci_dev *dev, int ps) +{ + int pos; + u16 cap; + struct pci_ats *ats; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); + if (!pos) + return -ENODEV; + + ats = kzalloc(sizeof(*ats), GFP_KERNEL); + if (!ats) + return -ENOMEM; + + ats->pos = pos; + ats->stu = ps; + pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); + ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : + PCI_ATS_MAX_QDEP; + dev->ats = ats; + + return 0; +} + +static void ats_free_one(struct pci_dev *dev) +{ + kfree(dev->ats); + dev->ats = NULL; +} + +/** + * pci_enable_ats - enable the ATS capability + * @dev: the PCI device + * @ps: the IOMMU page shift + * + * Returns 0 on success, or negative on failure. + */ +int pci_enable_ats(struct pci_dev *dev, int ps) +{ + int rc; + u16 ctrl; + + BUG_ON(dev->ats && dev->ats->is_enabled); + + if (ps < PCI_ATS_MIN_STU) + return -EINVAL; + + if (dev->is_physfn || dev->is_virtfn) { + struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn; + + mutex_lock(&pdev->sriov->lock); + if (pdev->ats) + rc = pdev->ats->stu == ps ? 0 : -EINVAL; + else + rc = ats_alloc_one(pdev, ps); + + if (!rc) + pdev->ats->ref_cnt++; + mutex_unlock(&pdev->sriov->lock); + if (rc) + return rc; + } + + if (!dev->is_physfn) { + rc = ats_alloc_one(dev, ps); + if (rc) + return rc; + } + + ctrl = PCI_ATS_CTRL_ENABLE; + if (!dev->is_virtfn) + ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU); + pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); + + dev->ats->is_enabled = 1; + + return 0; +} + +/** + * pci_disable_ats - disable the ATS capability + * @dev: the PCI device + */ +void pci_disable_ats(struct pci_dev *dev) +{ + u16 ctrl; + + BUG_ON(!dev->ats || !dev->ats->is_enabled); + + pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl); + ctrl &= ~PCI_ATS_CTRL_ENABLE; + pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl); + + dev->ats->is_enabled = 0; + + if (dev->is_physfn || dev->is_virtfn) { + struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn; + + mutex_lock(&pdev->sriov->lock); + pdev->ats->ref_cnt--; + if (!pdev->ats->ref_cnt) + ats_free_one(pdev); + mutex_unlock(&pdev->sriov->lock); + } + + if (!dev->is_physfn) + ats_free_one(dev); +} + +/** + * pci_ats_queue_depth - query the ATS Invalidate Queue Depth + * @dev: the PCI device + * + * Returns the queue depth on success, or negative on failure. + * + * The ATS spec uses 0 in the Invalidate Queue Depth field to + * indicate that the function can accept 32 Invalidate Request. + * But here we use the `real' values (i.e. 1~32) for the Queue + * Depth; and 0 indicates the function shares the Queue with + * other functions (doesn't exclusively own a Queue). + */ +int pci_ats_queue_depth(struct pci_dev *dev) +{ + int pos; + u16 cap; + + if (dev->is_virtfn) + return 0; + + if (dev->ats) + return dev->ats->qdep; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS); + if (!pos) + return -ENODEV; + + pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap); + + return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : + PCI_ATS_MAX_QDEP; +} diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d03f6b99f292..f73bcbedf37c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -229,6 +229,15 @@ struct pci_sriov { u8 __iomem *mstate; /* VF Migration State Array */ }; +/* Address Translation Service */ +struct pci_ats { + int pos; /* capability position */ + int stu; /* Smallest Translation Unit */ + int qdep; /* Invalidate Queue Depth */ + int ref_cnt; /* Physical Function reference count */ + int is_enabled:1; /* Enable bit is set */ +}; + #ifdef CONFIG_PCI_IOV extern int pci_iov_init(struct pci_dev *dev); extern void pci_iov_release(struct pci_dev *dev); @@ -236,6 +245,20 @@ extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); extern void pci_restore_iov_state(struct pci_dev *dev); extern int pci_iov_bus_range(struct pci_bus *bus); + +extern int pci_enable_ats(struct pci_dev *dev, int ps); +extern void pci_disable_ats(struct pci_dev *dev); +extern int pci_ats_queue_depth(struct pci_dev *dev); +/** + * pci_ats_enabled - query the ATS status + * @dev: the PCI device + * + * Returns 1 if ATS capability is enabled, or 0 if not. + */ +static inline int pci_ats_enabled(struct pci_dev *dev) +{ + return dev->ats && dev->ats->is_enabled; +} #else static inline int pci_iov_init(struct pci_dev *dev) { @@ -257,6 +280,22 @@ static inline int pci_iov_bus_range(struct pci_bus *bus) { return 0; } + +static inline int pci_enable_ats(struct pci_dev *dev, int ps) +{ + return -ENODEV; +} +static inline void pci_disable_ats(struct pci_dev *dev) +{ +} +static inline int pci_ats_queue_depth(struct pci_dev *dev) +{ + return -ENODEV; +} +static inline int pci_ats_enabled(struct pci_dev *dev) +{ + return 0; +} #endif /* CONFIG_PCI_IOV */ #endif /* DRIVERS_PCI_H */ diff --git a/fs/namespace.c b/fs/namespace.c index 2dd333b0fe7f..a7bea8c8bd46 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1937,6 +1937,21 @@ dput_out: return retval; } +static struct mnt_namespace *alloc_mnt_ns(void) +{ + struct mnt_namespace *new_ns; + + new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); + if (!new_ns) + return ERR_PTR(-ENOMEM); + atomic_set(&new_ns->count, 1); + new_ns->root = NULL; + INIT_LIST_HEAD(&new_ns->list); + init_waitqueue_head(&new_ns->poll); + new_ns->event = 0; + return new_ns; +} + /* * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. @@ -1948,14 +1963,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; struct vfsmount *p, *q; - new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); - if (!new_ns) - return ERR_PTR(-ENOMEM); - - atomic_set(&new_ns->count, 1); - INIT_LIST_HEAD(&new_ns->list); - init_waitqueue_head(&new_ns->poll); - new_ns->event = 0; + new_ns = alloc_mnt_ns(); + if (IS_ERR(new_ns)) + return new_ns; down_write(&namespace_sem); /* First pass: copy the tree topology */ @@ -2019,6 +2029,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, return new_ns; } +/** + * create_mnt_ns - creates a private namespace and adds a root filesystem + * @mnt: pointer to the new root filesystem mountpoint + */ +struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) +{ + struct mnt_namespace *new_ns; + + new_ns = alloc_mnt_ns(); + if (!IS_ERR(new_ns)) { + mnt->mnt_ns = new_ns; + new_ns->root = mnt; + list_add(&new_ns->list, &new_ns->root->mnt_list); + } + return new_ns; +} +EXPORT_SYMBOL(create_mnt_ns); + SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) { @@ -2246,10 +2274,14 @@ void __init mnt_init(void) init_mount_tree(); } -void __put_mnt_ns(struct mnt_namespace *ns) +void put_mnt_ns(struct mnt_namespace *ns) { - struct vfsmount *root = ns->root; + struct vfsmount *root; LIST_HEAD(umount_list); + + if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock)) + return; + root = ns->root; ns->root = NULL; spin_unlock(&vfsmount_lock); down_write(&namespace_sem); @@ -2260,3 +2292,4 @@ void __put_mnt_ns(struct mnt_namespace *ns) release_mounts(&umount_list); kfree(ns); } +EXPORT_SYMBOL(put_mnt_ns); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index f01caec84463..40c766782891 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -65,6 +65,11 @@ char *nfs_path(const char *base, dentry = dentry->d_parent; } spin_unlock(&dcache_lock); + if (*end != '/') { + if (--buflen < 0) + goto Elong; + *--end = '/'; + } namelen = strlen(base); /* Strip off excess slashes in base string */ while (namelen > 0 && base[namelen - 1] == '/') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 3d460527daab..0b4cbdc60abd 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -42,6 +42,8 @@ #include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/mnt_namespace.h> +#include <linux/namei.h> #include <linux/nfs_idmap.h> #include <linux/vfs.h> #include <linux/inet.h> @@ -272,10 +274,14 @@ static const struct super_operations nfs_sops = { #ifdef CONFIG_NFS_V4 static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_remote_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { @@ -286,6 +292,14 @@ static struct file_system_type nfs4_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static struct file_system_type nfs4_remote_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_remote_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + struct file_system_type nfs4_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -294,6 +308,14 @@ struct file_system_type nfs4_xdev_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static struct file_system_type nfs4_remote_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_remote_referral_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -2433,12 +2455,12 @@ out_no_client_address: } /* - * Get the superblock for an NFS4 mountpoint + * Get the superblock for the NFS4 root partition */ -static int nfs4_get_sb(struct file_system_type *fs_type, +static int nfs4_remote_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - struct nfs_parsed_mount_data *data; + struct nfs_parsed_mount_data *data = raw_data; struct super_block *s; struct nfs_server *server; struct nfs_fh *mntfh; @@ -2449,18 +2471,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type, }; int error = -ENOMEM; - data = kzalloc(sizeof(*data), GFP_KERNEL); mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); if (data == NULL || mntfh == NULL) goto out_free_fh; security_init_mnt_opts(&data->lsm_opts); - /* Validate the mount data */ - error = nfs4_validate_mount_data(raw_data, data, dev_name); - if (error < 0) - goto out; - /* Get a volume representation */ server = nfs4_create_server(data, mntfh); if (IS_ERR(server)) { @@ -2473,7 +2489,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, compare_super = NULL; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); + s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; @@ -2510,14 +2526,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data->client_address); - kfree(data->nfs_server.export_path); - kfree(data->nfs_server.hostname); - kfree(data->fscache_uniq); security_free_mnt_opts(&data->lsm_opts); out_free_fh: kfree(mntfh); - kfree(data); return error; out_free: @@ -2531,6 +2542,126 @@ error_splat_super: goto out; } +static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, + int flags, void *data, const char *hostname) +{ + struct vfsmount *root_mnt; + char *root_devname; + size_t len; + + len = strlen(hostname) + 3; + root_devname = kmalloc(len, GFP_KERNEL); + if (root_devname == NULL) + return ERR_PTR(-ENOMEM); + snprintf(root_devname, len, "%s:/", hostname); + root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); + kfree(root_devname); + return root_mnt; +} + +static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt) +{ + char *page = (char *) __get_free_page(GFP_KERNEL); + char *devname, *tmp; + + if (page == NULL) + return; + devname = nfs_path(path->mnt->mnt_devname, + path->mnt->mnt_root, path->dentry, + page, PAGE_SIZE); + if (devname == NULL) + goto out_freepage; + tmp = kstrdup(devname, GFP_KERNEL); + if (tmp == NULL) + goto out_freepage; + kfree(mnt->mnt_devname); + mnt->mnt_devname = tmp; +out_freepage: + free_page((unsigned long)page); +} + +static int nfs_follow_remote_path(struct vfsmount *root_mnt, + const char *export_path, struct vfsmount *mnt_target) +{ + struct mnt_namespace *ns_private; + struct nameidata nd; + struct super_block *s; + int ret; + + ns_private = create_mnt_ns(root_mnt); + ret = PTR_ERR(ns_private); + if (IS_ERR(ns_private)) + goto out_mntput; + + ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, + export_path, LOOKUP_FOLLOW, &nd); + + put_mnt_ns(ns_private); + + if (ret != 0) + goto out_err; + + s = nd.path.mnt->mnt_sb; + atomic_inc(&s->s_active); + mnt_target->mnt_sb = s; + mnt_target->mnt_root = dget(nd.path.dentry); + + /* Correct the device pathname */ + nfs_fix_devname(&nd.path, mnt_target); + + path_put(&nd.path); + down_write(&s->s_umount); + return 0; +out_mntput: + mntput(root_mnt); +out_err: + return ret; +} + +/* + * Get the superblock for an NFS4 mountpoint + */ +static int nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_parsed_mount_data *data; + char *export_path; + struct vfsmount *root_mnt; + int error = -ENOMEM; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + goto out_free_data; + + /* Validate the mount data */ + error = nfs4_validate_mount_data(raw_data, data, dev_name); + if (error < 0) + goto out; + + export_path = data->nfs_server.export_path; + data->nfs_server.export_path = "/"; + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, + data->nfs_server.hostname); + data->nfs_server.export_path = export_path; + + error = PTR_ERR(root_mnt); + if (IS_ERR(root_mnt)) + goto out; + + error = nfs_follow_remote_path(root_mnt, export_path, mnt); + +out: + kfree(data->client_address); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + kfree(data->fscache_uniq); +out_free_data: + kfree(data); + dprintk("<-- nfs4_get_sb() = %d%s\n", error, + error != 0 ? " [error]" : ""); + return error; +} + static void nfs4_kill_super(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); @@ -2627,12 +2758,9 @@ error_splat_super: return error; } -/* - * Create an NFS4 server record on referral traversal - */ -static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data, - struct vfsmount *mnt) +static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, + struct vfsmount *mnt) { struct nfs_clone_mount *data = raw_data; struct super_block *s; @@ -2711,4 +2839,36 @@ error_splat_super: return error; } +/* + * Create an NFS4 server record on referral traversal + */ +static int nfs4_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + char *export_path; + struct vfsmount *root_mnt; + int error; + + dprintk("--> nfs4_referral_get_sb()\n"); + + export_path = data->mnt_path; + data->mnt_path = "/"; + + root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, + flags, data, data->hostname); + data->mnt_path = export_path; + + error = PTR_ERR(root_mnt); + if (IS_ERR(root_mnt)) + goto out; + + error = nfs_follow_remote_path(root_mnt, export_path, mnt); +out: + dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error, + error != 0 ? " [error]" : ""); + return error; +} + #endif /* CONFIG_NFS_V4 */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2643d848df90..4d668e05d458 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -69,7 +69,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); -extern void cpu_hotplug_init(void); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); @@ -84,10 +83,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) { } -static inline void cpu_hotplug_init(void) -{ -} - static inline void cpu_maps_update_begin(void) { } diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 1a455f1f86d7..5619f8522738 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -13,6 +13,10 @@ #define DMA_PTE_WRITE (2) #define DMA_PTE_SNP (1 << 11) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_DEV_IOTLB 1 +#define CONTEXT_TT_PASS_THROUGH 2 + struct intel_iommu; struct dmar_domain; struct root_entry; @@ -21,11 +25,16 @@ extern void free_dmar_iommu(struct intel_iommu *iommu); #ifdef CONFIG_DMAR extern int iommu_calculate_agaw(struct intel_iommu *iommu); +extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { return 0; } +static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) +{ + return 0; +} #endif extern int dmar_disabled; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 10ff5c498824..1731fb5fd775 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -188,6 +188,15 @@ struct dmar_rmrr_unit { #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) + +struct dmar_atsr_unit { + struct list_head list; /* list of ATSR units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + struct pci_dev **devices; /* target devices */ + int devices_cnt; /* target device count */ + u8 include_all:1; /* include all ports */ +}; + /* Intel DMAR initialization functions */ extern int intel_iommu_init(void); #else diff --git a/include/linux/firewire.h b/include/linux/firewire.h index e584b7215e8b..9823946adbc5 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -3,6 +3,7 @@ #include <linux/completion.h> #include <linux/device.h> +#include <linux/dma-mapping.h> #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> @@ -355,4 +356,90 @@ int fw_run_transaction(struct fw_card *card, int tcode, int destination_id, int generation, int speed, unsigned long long offset, void *payload, size_t length); +static inline int fw_stream_packet_destination_id(int tag, int channel, int sy) +{ + return tag << 14 | channel << 8 | sy; +} + +struct fw_descriptor { + struct list_head link; + size_t length; + u32 immediate; + u32 key; + const u32 *data; +}; + +int fw_core_add_descriptor(struct fw_descriptor *desc); +void fw_core_remove_descriptor(struct fw_descriptor *desc); + +/* + * The iso packet format allows for an immediate header/payload part + * stored in 'header' immediately after the packet info plus an + * indirect payload part that is pointer to by the 'payload' field. + * Applications can use one or the other or both to implement simple + * low-bandwidth streaming (e.g. audio) or more advanced + * scatter-gather streaming (e.g. assembling video frame automatically). + */ +struct fw_iso_packet { + u16 payload_length; /* Length of indirect payload. */ + u32 interrupt:1; /* Generate interrupt on this packet */ + u32 skip:1; /* Set to not send packet at all. */ + u32 tag:2; + u32 sy:4; + u32 header_length:8; /* Length of immediate header. */ + u32 header[0]; +}; + +#define FW_ISO_CONTEXT_TRANSMIT 0 +#define FW_ISO_CONTEXT_RECEIVE 1 + +#define FW_ISO_CONTEXT_MATCH_TAG0 1 +#define FW_ISO_CONTEXT_MATCH_TAG1 2 +#define FW_ISO_CONTEXT_MATCH_TAG2 4 +#define FW_ISO_CONTEXT_MATCH_TAG3 8 +#define FW_ISO_CONTEXT_MATCH_ALL_TAGS 15 + +/* + * An iso buffer is just a set of pages mapped for DMA in the + * specified direction. Since the pages are to be used for DMA, they + * are not mapped into the kernel virtual address space. We store the + * DMA address in the page private. The helper function + * fw_iso_buffer_map() will map the pages into a given vma. + */ +struct fw_iso_buffer { + enum dma_data_direction direction; + struct page **pages; + int page_count; +}; + +int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card, + int page_count, enum dma_data_direction direction); +void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, struct fw_card *card); + +struct fw_iso_context; +typedef void (*fw_iso_callback_t)(struct fw_iso_context *context, + u32 cycle, size_t header_length, + void *header, void *data); +struct fw_iso_context { + struct fw_card *card; + int type; + int channel; + int speed; + size_t header_size; + fw_iso_callback_t callback; + void *callback_data; +}; + +struct fw_iso_context *fw_iso_context_create(struct fw_card *card, + int type, int channel, int speed, size_t header_size, + fw_iso_callback_t callback, void *callback_data); +int fw_iso_context_queue(struct fw_iso_context *ctx, + struct fw_iso_packet *packet, + struct fw_iso_buffer *buffer, + unsigned long payload); +int fw_iso_context_start(struct fw_iso_context *ctx, + int cycle, int sync, int tags); +int fw_iso_context_stop(struct fw_iso_context *ctx); +void fw_iso_context_destroy(struct fw_iso_context *ctx); + #endif /* _LINUX_FIREWIRE_H */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index aa8c53171233..482dc91fd53a 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -53,6 +53,7 @@ #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ #define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ #define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ @@ -120,8 +121,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) #define ecap_qis(e) ((e) & 0x2) +#define ecap_pass_through(e) ((e >> 6) & 0x1) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) #define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */ @@ -197,6 +200,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_FSTS_PPF ((u32)2) #define DMA_FSTS_PFO ((u32)1) #define DMA_FSTS_IQE (1 << 4) +#define DMA_FSTS_ICE (1 << 5) +#define DMA_FSTS_ITE (1 << 6) #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) /* FRCD_REG, 32 bits access */ @@ -225,7 +230,8 @@ do { \ enum { QI_FREE, QI_IN_USE, - QI_DONE + QI_DONE, + QI_ABORT }; #define QI_CC_TYPE 0x1 @@ -254,6 +260,12 @@ enum { #define QI_CC_DID(did) (((u64)did) << 16) #define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4)) +#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) +#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) +#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) +#define QI_DEV_IOTLB_SIZE 1 +#define QI_DEV_IOTLB_MAX_INVS 32 + struct qi_desc { u64 low, high; }; @@ -280,10 +292,10 @@ struct ir_table { #endif struct iommu_flush { - int (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, - u64 type, int non_present_entry_flush); - int (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, int non_present_entry_flush); + void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type); + void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); }; enum { @@ -302,6 +314,7 @@ struct intel_iommu { spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ + int msagaw; /* max sagaw of this iommu */ unsigned int irq; unsigned char name[13]; /* Device Name */ @@ -329,6 +342,7 @@ static inline void __iommu_flush_cache( } extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); +extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); @@ -337,11 +351,12 @@ extern void dmar_disable_qi(struct intel_iommu *iommu); extern int dmar_reenable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); -extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, - u8 fm, u64 type, int non_present_entry_flush); -extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, - unsigned int size_order, u64 type, - int non_present_entry_flush); +extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, + u8 fm, u64 type); +extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); +extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, + u64 addr, unsigned mask); extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 3a059298cc19..3beb2592b03f 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -24,16 +24,10 @@ struct proc_mounts { struct fs_struct; +extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt); extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); -extern void __put_mnt_ns(struct mnt_namespace *ns); - -static inline void put_mnt_ns(struct mnt_namespace *ns) -{ - if (atomic_dec_and_lock(&ns->count, &vfsmount_lock)) - /* releases vfsmount_lock */ - __put_mnt_ns(ns); -} +extern void put_mnt_ns(struct mnt_namespace *ns); static inline void exit_mnt_ns(struct task_struct *p) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 1365c745bdb7..d304ddf412d0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -196,6 +196,7 @@ struct pci_cap_saved_state { struct pcie_link_state; struct pci_vpd; struct pci_sriov; +struct pci_ats; /* * The pci_dev structure is used to describe PCI devices. @@ -293,6 +294,7 @@ struct pci_dev { struct pci_sriov *sriov; /* SR-IOV capability related */ struct pci_dev *physfn; /* the PF this VF is associated with */ }; + struct pci_ats *ats; /* Address Translation Service */ #endif }; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 83b02f5a25b2..fcaee42c7ac2 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -502,6 +502,7 @@ #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 #define PCI_EXT_CAP_ID_ARI 14 +#define PCI_EXT_CAP_ID_ATS 15 #define PCI_EXT_CAP_ID_SRIOV 16 /* Advanced Error Reporting */ @@ -620,6 +621,15 @@ #define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ #define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +/* Address Translation Service */ +#define PCI_ATS_CAP 0x04 /* ATS Capability Register */ +#define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ +#define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ +#define PCI_ATS_CTRL 0x06 /* ATS Control Register */ +#define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ +#define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ +#define PCI_ATS_MIN_STU 12 /* shift of minimum STU block */ + /* Single Root I/O Virtualization */ #define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ #define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ diff --git a/init/main.c b/init/main.c index 09131ec090c1..4870dfeb9ee5 100644 --- a/init/main.c +++ b/init/main.c @@ -678,7 +678,6 @@ asmlinkage void __init start_kernel(void) #endif page_cgroup_init(); enable_debug_pagealloc(); - cpu_hotplug_init(); kmemtrace_init(); kmemleak_init(); debug_objects_mem_init(); diff --git a/kernel/cpu.c b/kernel/cpu.c index 395b6974dc8d..8ce10043e4ac 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -34,14 +34,11 @@ static struct { * an ongoing cpu hotplug operation. */ int refcount; -} cpu_hotplug; - -void __init cpu_hotplug_init(void) -{ - cpu_hotplug.active_writer = NULL; - mutex_init(&cpu_hotplug.lock); - cpu_hotplug.refcount = 0; -} +} cpu_hotplug = { + .active_writer = NULL, + .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), + .refcount = 0, +}; #ifdef CONFIG_HOTPLUG_CPU |