11 files changed, 254 insertions, 26 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
index 84cbb302f2b5..1a0b704e1a38 100644
--- a/Documentation/bpf/bpf_devel_QA.txt
+++ b/Documentation/bpf/bpf_devel_QA.txt
@@ -539,6 +539,18 @@ A: Although LLVM IR generation and optimization try to stay architecture
        The clang option "-fno-jump-tables" can be used to disable
        switch table generation.
 
+     - For clang -target bpf, it is guaranteed that pointer or long /
+       unsigned long types will always have a width of 64 bit, no matter
+       whether underlying clang binary or default target (or kernel) is
+       32 bit. However, when native clang target is used, then it will
+       compile these types based on the underlying architecture's conventions,
+       meaning in case of 32 bit architecture, pointer or long / unsigned
+       long types e.g. in BPF context structure will have width of 32 bit
+       while the BPF LLVM back end still operates in 64 bit. The native
+       target is mostly needed in tracing for the case of walking pt_regs
+       or other kernel structures where CPU's register width matters.
+       Otherwise, clang -target bpf is generally recommended.
+
    You should use default target when:
 
      - Your program includes a header file, e.g., ptrace.h, which eventually
diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index 8c033d48e2ba..60d50a2b0323 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -13,9 +13,18 @@ placed as a child node of an mdio device.
 The properties described here are those specific to Marvell devices.
 Additional required and optional properties can be found in dsa.txt.
 
+The compatibility string is used only to find an identification register,
+which is at a different MDIO base address in different switch families.
+- "marvell,mv88e6085"	: Switch has base address 0x10. Use with models:
+			  6085, 6095, 6097, 6123, 6131, 6141, 6161, 6165,
+			  6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321,
+			  6341, 6350, 6351, 6352
+- "marvell,mv88e6190"	: Switch has base address 0x00. Use with models:
+			  6190, 6190X, 6191, 6290, 6390, 6390X
+
 Required properties:
 - compatible		: Should be one of "marvell,mv88e6085" or
-			  "marvell,mv88e6190"
+			  "marvell,mv88e6190" as indicated above
 - reg			: Address on the MII bus for the switch.
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
new file mode 100644
index 000000000000..2aaef567c5be
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
@@ -0,0 +1,23 @@
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+  - compatible:		should be "nxp,mcr20a"
+  - spi-max-frequency:	maximal bus speed, should be set to a frequency
+			lower than 9000000 depends sync or async operation mode
+  - reg:		the chipselect index
+  - interrupts:		the interrupt generated by the device. Non high-level
+			can occur deadlocks while handling isr.
+
+Optional properties:
+  - rst_b-gpio:		GPIO spec for the RST_B pin
+
+Example:
+
+	mcr20a@0 {
+		compatible = "nxp,mcr20a";
+		spi-max-frequency = <9000000>;
+		reg = <0>;
+		interrupts = <17 2>;
+		interrupt-parent = <&gpio>;
+		rst_b-gpio = <&gpio 27 1>
+	};
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 27966ae741e0..457d5ae16f23 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -29,6 +29,7 @@ Optional properties for PHY child node:
 - reset-gpios : Should specify the gpio for phy reset
 - magic-packet : If present, indicates that the hardware supports waking
   up via magic packet.
+- phy-handle : see ethernet.txt file in the same directory
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
index f1c441bedf68..929591d52ed6 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.txt
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -33,6 +33,10 @@ Optional Properties:
   Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
   high Tx rate. Must not be present for SFF modules
 
+- maximum-power-milliwatt : Maximum module power consumption
+  Specifies the maximum power consumption allowable by a module in the
+  slot, in milli-Watts.  Presently, modules can be up to 1W, 1.5W or 2W.
+
 Example #1: Direct serdes to SFP connection
 
 sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
 	i2c-bus = <&sfp_1g_i2c>;
 	los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
 	mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+	maximum-power-milliwatt = <1000>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
 	tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 02c4353b5cf2..9ef9338aaee1 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -25,6 +25,8 @@ Optional property:
 				    software needs to take when this pin is
 				    strapped in these modes. See data manual
 				    for details.
+	- ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
+				    for applicable values.
 
 Note: ti,min-output-impedance and ti,max-output-impedance are mutually
       exclusive. When both properties are present ti,max-output-impedance
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a553d4e4a0fb..1d1120753ae8 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -755,13 +755,13 @@ udp_rmem_min - INTEGER
 	Minimal size of receive buffer used by UDP sockets in moderation.
 	Each UDP socket is able to use the size for receiving data, even if
 	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 1 page
+	Default: 4K
 
 udp_wmem_min - INTEGER
 	Minimal size of send buffer used by UDP sockets in moderation.
 	Each UDP socket is able to use the size for sending data, even if
 	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 1 page
+	Default: 4K
 
 CIPSOv4 Variables:
 
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
 	FALSE: disabled
 	Default: FALSE
 
+fib_multipath_hash_policy - INTEGER
+	Controls which hash policy to use for multipath routes.
+	Default: 0 (Layer 3)
+	Possible values:
+	0 - Layer 3 (source and destination addresses plus flow label)
+	1 - Layer 4 (standard 5-tuple)
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
@@ -2094,7 +2101,7 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
 	It is guaranteed to each SCTP socket (but not association) even
 	under moderate memory pressure.
 
-	Default: 1 page
+	Default: 4K
 
 sctp_wmem  - vector of 3 INTEGERs: min, default, max
 	Currently this tunable has no effect.
diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst
index 291a01264967..fe46d4867e2d 100644
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -72,11 +72,6 @@ this flag, a process must first signal intent by setting a socket option:
 	if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
 		error(1, errno, "setsockopt zerocopy");
 
-Setting the socket option only works when the socket is in its initial
-(TCP_CLOSED) state.  Trying to set the option for a socket returned by accept(),
-for example, will lead to an EBUSY error. In this case, the option should be set
-to the listening socket and it will be inherited by the accepted sockets.
-
 Transmission
 ------------
 
diff --git a/Documentation/networking/net_dim.txt b/Documentation/networking/net_dim.txt
new file mode 100644
index 000000000000..9cb31c5e2dcd
--- /dev/null
+++ b/Documentation/networking/net_dim.txt
@@ -0,0 +1,174 @@
+Net DIM - Generic Network Dynamic Interrupt Moderation
+======================================================
+
+Author:
+	Tal Gilboa <talgi@mellanox.com>
+
+
+Contents
+=========
+
+- Assumptions
+- Introduction
+- The Net DIM Algorithm
+- Registering a Network Device to DIM
+- Example
+
+Part 0: Assumptions
+======================
+
+This document assumes the reader has basic knowledge in network drivers
+and in general interrupt moderation.
+
+
+Part I: Introduction
+======================
+
+Dynamic Interrupt Moderation (DIM) (in networking) refers to changing the
+interrupt moderation configuration of a channel in order to optimize packet
+processing. The mechanism includes an algorithm which decides if and how to
+change moderation parameters for a channel, usually by performing an analysis on
+runtime data sampled from the system. Net DIM is such a mechanism. In each
+iteration of the algorithm, it analyses a given sample of the data, compares it
+to the previous sample and if required, it can decide to change some of the
+interrupt moderation configuration fields. The data sample is composed of data
+bandwidth, the number of packets and the number of events. The time between
+samples is also measured. Net DIM compares the current and the previous data and
+returns an adjusted interrupt moderation configuration object. In some cases,
+the algorithm might decide not to change anything. The configuration fields are
+the minimum duration (microseconds) allowed between events and the maximum
+number of wanted packets per event. The Net DIM algorithm ascribes importance to
+increase bandwidth over reducing interrupt rate.
+
+
+Part II: The Net DIM Algorithm
+===============================
+
+Each iteration of the Net DIM algorithm follows these steps:
+1. Calculates new data sample.
+2. Compares it to previous sample.
+3. Makes a decision - suggests interrupt moderation configuration fields.
+4. Applies a schedule work function, which applies suggested configuration.
+
+The first two steps are straightforward, both the new and the previous data are
+supplied by the driver registered to Net DIM. The previous data is the new data
+supplied to the previous iteration. The comparison step checks the difference
+between the new and previous data and decides on the result of the last step.
+A step would result as "better" if bandwidth increases and as "worse" if
+bandwidth reduces. If there is no change in bandwidth, the packet rate is
+compared in a similar fashion - increase == "better" and decrease == "worse".
+In case there is no change in the packet rate as well, the interrupt rate is
+compared. Here the algorithm tries to optimize for lower interrupt rate so an
+increase in the interrupt rate is considered "worse" and a decrease is
+considered "better". Step #2 has an optimization for avoiding false results: it
+only considers a difference between samples as valid if it is greater than a
+certain percentage. Also, since Net DIM does not measure anything by itself, it
+assumes the data provided by the driver is valid.
+
+Step #3 decides on the suggested configuration based on the result from step #2
+and the internal state of the algorithm. The states reflect the "direction" of
+the algorithm: is it going left (reducing moderation), right (increasing
+moderation) or standing still. Another optimization is that if a decision
+to stay still is made multiple times, the interval between iterations of the
+algorithm would increase in order to reduce calculation overhead. Also, after
+"parking" on one of the most left or most right decisions, the algorithm may
+decide to verify this decision by taking a step in the other direction. This is
+done in order to avoid getting stuck in a "deep sleep" scenario. Once a
+decision is made, an interrupt moderation configuration is selected from
+the predefined profiles.
+
+The last step is to notify the registered driver that it should apply the
+suggested configuration. This is done by scheduling a work function, defined by
+the Net DIM API and provided by the registered driver.
+
+As you can see, Net DIM itself does not actively interact with the system. It
+would have trouble making the correct decisions if the wrong data is supplied to
+it and it would be useless if the work function would not apply the suggested
+configuration. This does, however, allow the registered driver some room for
+manoeuvre as it may provide partial data or ignore the algorithm suggestion
+under some conditions.
+
+
+Part III: Registering a Network Device to DIM
+==============================================
+
+Net DIM API exposes the main function net_dim(struct net_dim *dim,
+struct net_dim_sample end_sample). This function is the entry point to the Net
+DIM algorithm and has to be called every time the driver would like to check if
+it should change interrupt moderation parameters. The driver should provide two
+data structures: struct net_dim and struct net_dim_sample. Struct net_dim
+describes the state of DIM for a specific object (RX queue, TX queue,
+other queues, etc.). This includes the current selected profile, previous data
+samples, the callback function provided by the driver and more.
+Struct net_dim_sample describes a data sample, which will be compared to the
+data sample stored in struct net_dim in order to decide on the algorithm's next
+step. The sample should include bytes, packets and interrupts, measured by
+the driver.
+
+In order to use Net DIM from a networking driver, the driver needs to call the
+main net_dim() function. The recommended method is to call net_dim() on each
+interrupt. Since Net DIM has a built-in moderation and it might decide to skip
+iterations under certain conditions, there is no need to moderate the net_dim()
+calls as well. As mentioned above, the driver needs to provide an object of type
+struct net_dim to the net_dim() function call. It is advised for each entity
+using Net DIM to hold a struct net_dim as part of its data structure and use it
+as the main Net DIM API object. The struct net_dim_sample should hold the latest
+bytes, packets and interrupts count. No need to perform any calculations, just
+include the raw data.
+
+The net_dim() call itself does not return anything. Instead Net DIM relies on
+the driver to provide a callback function, which is called when the algorithm
+decides to make a change in the interrupt moderation parameters. This callback
+will be scheduled and run in a separate thread in order not to add overhead to
+the data flow. After the work is done, Net DIM algorithm needs to be set to
+the proper state in order to move to the next iteration.
+
+
+Part IV: Example
+=================
+
+The following code demonstrates how to register a driver to Net DIM. The actual
+usage is not complete but it should make the outline of the usage clear.
+
+my_driver.c:
+
+#include <linux/net_dim.h>
+
+/* Callback for net DIM to schedule on a decision to change moderation */
+void my_driver_do_dim_work(struct work_struct *work)
+{
+	/* Get struct net_dim from struct work_struct */
+	struct net_dim *dim = container_of(work, struct net_dim,
+					   work);
+	/* Do interrupt moderation related stuff */
+	...
+
+	/* Signal net DIM work is done and it should move to next iteration */
+	dim->state = NET_DIM_START_MEASURE;
+}
+
+/* My driver's interrupt handler */
+int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...)
+{
+	...
+	/* A struct to hold current measured data */
+	struct net_dim_sample dim_sample;
+	...
+	/* Initiate data sample struct with current data */
+	net_dim_sample(my_entity->events,
+		       my_entity->packets,
+		       my_entity->bytes,
+		       &dim_sample);
+	/* Call net DIM */
+	net_dim(&my_entity->dim, dim_sample);
+	...
+}
+
+/* My entity's initialization function (my_entity was already allocated) */
+int my_driver_init_my_entity(struct my_driver_entity *my_entity, ...)
+{
+	...
+	/* Initiate struct work_struct with my driver's callback function */
+	INIT_WORK(&my_entity->dim.work, my_driver_do_dim_work);
+	...
+}
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index bf654845556e..999eb41da81d 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -7,15 +7,12 @@ socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for
 i) capture network traffic with utilities like tcpdump, ii) transmit network
 traffic, or any other that needs raw access to network interface.
 
-You can find the latest version of this document at:
-    http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap
-
 Howto can be found at:
-    http://wiki.gnu-log.net (packet_mmap)
+    https://sites.google.com/site/packetmmap/
 
 Please send your comments to
     Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
-    Johann Baudy <johann.baudy@gnu-log.net>
+    Johann Baudy
 
 -------------------------------------------------------------------------------
 + Why use PACKET_MMAP
@@ -51,17 +48,8 @@ From the user standpoint, you should use the higher level libpcap library, which
 is a de facto standard, portable across nearly all operating systems
 including Win32. 
 
-Said that, at time of this writing, official libpcap 0.8.1 is out and doesn't include
-support for PACKET_MMAP, and also probably the libpcap included in your distribution. 
-
-I'm aware of two implementations of PACKET_MMAP in libpcap:
-
-    http://wiki.ipxwarzone.com/		     (by Simon Patarin, based on libpcap 0.6.2)
-    http://public.lanl.gov/cpw/              (by Phil Wood, based on lastest libpcap)
-
-The rest of this document is intended for people who want to understand
-the low level details or want to improve libpcap by including PACKET_MMAP
-support.
+Packet MMAP support was integrated into libpcap around the time of version 1.3.0;
+TPACKET_V3 support was added in version 1.5.0
 
 --------------------------------------------------------------------------------
 + How to use mmap() directly to improve capture process
@@ -174,7 +162,7 @@ As capture, each frame contains two parts:
  /* bind socket to eth0 */
  bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
 
- A complete tutorial is available at: http://wiki.gnu-log.net/
+ A complete tutorial is available at: https://sites.google.com/site/packetmmap/
 
 By default, the user should put data at :
  frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 35c62f522754..5992602469d8 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -270,6 +270,18 @@ optmem_max
 Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
 of struct cmsghdr structures with appended data.
 
+fb_tunnels_only_for_init_net
+----------------------------
+
+Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
+sit0, ip6tnl0, ip6gre0) are automatically created when a new
+network namespace is created, if corresponding tunnel is present
+in initial network namespace.
+If set to 1, these devices are not automatically created, and
+user space is responsible for creating them if needed.
+
+Default : 0  (for compatibility reasons)
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 -------------------------------------------------------