diff options
167 files changed, 3761 insertions, 3132 deletions
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index b519503be51a..4717bc24eada 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -4,19 +4,24 @@ This file provides information on what the device node for the Ethernet AVB interface contains. Required properties: -- compatible: "renesas,etheravb-r8a7790" if the device is a part of R8A7790 SoC. - "renesas,etheravb-r8a7791" if the device is a part of R8A7791 SoC. - "renesas,etheravb-r8a7792" if the device is a part of R8A7792 SoC. - "renesas,etheravb-r8a7793" if the device is a part of R8A7793 SoC. - "renesas,etheravb-r8a7794" if the device is a part of R8A7794 SoC. - "renesas,etheravb-r8a7795" if the device is a part of R8A7795 SoC. - "renesas,etheravb-r8a7796" if the device is a part of R8A7796 SoC. - "renesas,etheravb-rcar-gen2" for generic R-Car Gen 2 compatible interface. - "renesas,etheravb-rcar-gen3" for generic R-Car Gen 3 compatible interface. +- compatible: Must contain one or more of the following: + - "renesas,etheravb-r8a7743" for the R8A7743 SoC. + - "renesas,etheravb-r8a7790" for the R8A7790 SoC. + - "renesas,etheravb-r8a7791" for the R8A7791 SoC. + - "renesas,etheravb-r8a7792" for the R8A7792 SoC. + - "renesas,etheravb-r8a7793" for the R8A7793 SoC. + - "renesas,etheravb-r8a7794" for the R8A7794 SoC. + - "renesas,etheravb-rcar-gen2" as a fallback for the above + R-Car Gen2 and RZ/G1 devices. - When compatible with the generic version, nodes must list the - SoC-specific version corresponding to the platform first - followed by the generic version. + - "renesas,etheravb-r8a7795" for the R8A7795 SoC. + - "renesas,etheravb-r8a7796" for the R8A7796 SoC. + - "renesas,etheravb-rcar-gen3" as a fallback for the above + R-Car Gen3 devices. + + When compatible with the generic version, nodes must list the + SoC-specific version corresponding to the platform first followed by + the generic version. - reg: offset and length of (1) the register block and (2) the stream buffer. - interrupts: A list of interrupt-specifiers, one for each entry in diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt new file mode 100644 index 000000000000..38f9ec076743 --- /dev/null +++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt @@ -0,0 +1,55 @@ +XILINX AXI ETHERNET Device Tree Bindings +-------------------------------------------------------- + +Also called AXI 1G/2.5G Ethernet Subsystem, the xilinx axi ethernet IP core +provides connectivity to an external ethernet PHY supporting different +interfaces: MII, GMII, RGMII, SGMII, 1000BaseX. It also includes two +segments of memory for buffering TX and RX, as well as the capability of +offloading TX/RX checksum calculation off the processor. + +Management configuration is done through the AXI interface, while payload is +sent and received through means of an AXI DMA controller. This driver +includes the DMA driver code, so this driver is incompatible with AXI DMA +driver. + +For more details about mdio please refer phy.txt file in the same directory. + +Required properties: +- compatible : Must be one of "xlnx,axi-ethernet-1.00.a", + "xlnx,axi-ethernet-1.01.a", "xlnx,axi-ethernet-2.01.a" +- reg : Address and length of the IO space. +- interrupts : Should be a list of two interrupt, TX and RX. +- phy-handle : Should point to the external phy device. + See ethernet.txt file in the same directory. +- xlnx,rxmem : Set to allocated memory buffer for Rx/Tx in the hardware + +Optional properties: +- phy-mode : See ethernet.txt +- xlnx,phy-type : Deprecated, do not use, but still accepted in preference + to phy-mode. +- xlnx,txcsum : 0 or empty for disabling TX checksum offload, + 1 to enable partial TX checksum offload, + 2 to enable full TX checksum offload +- xlnx,rxcsum : Same values as xlnx,txcsum but for RX checksum offload + +Example: + axi_ethernet_eth: ethernet@40c00000 { + compatible = "xlnx,axi-ethernet-1.00.a"; + device_type = "network"; + interrupt-parent = <µblaze_0_axi_intc>; + interrupts = <2 0>; + phy-mode = "mii"; + reg = <0x40c00000 0x40000>; + xlnx,rxcsum = <0x2>; + xlnx,rxmem = <0x800>; + xlnx,txcsum = <0x2>; + phy-handle = <&phy0>; + axi_ethernetlite_0_mdio: mdio { + #address-cells = <1>; + #size-cells = <0>; + phy0: phy@0 { + device_type = "ethernet-phy"; + reg = <1>; + }; + }; + }; diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 974ab47ae53a..f485d553e65c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1291,8 +1291,7 @@ tag - INTEGER xfrm4_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv4 destination cache entries. At twice this value the system will - refuse new allocations. The value must be set below the flowcache - limit (4096 * number of online cpus) to take effect. + refuse new allocations. igmp_link_local_mcast_reports - BOOLEAN Enable IGMP reports for link local multicast groups in the @@ -1778,8 +1777,7 @@ ratelimit - INTEGER xfrm6_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv6 destination cache entries. At twice this value the system will - refuse new allocations. The value must be set below the flowcache - limit (4096 * number of online cpus) to take effect. + refuse new allocations. IPv6 Update by: diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 906705e5f776..acf16c323e38 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -2374,7 +2374,7 @@ MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles"); /********** module entry **********/ -static struct pci_device_id amb_pci_tbl[] = { +static const struct pci_device_id amb_pci_tbl[] = { { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 }, { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 }, { 0, } diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index b042ec458544..ce47eb17901d 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2292,7 +2292,7 @@ err_disable: } -static struct pci_device_id eni_pci_tbl[] = { +static const struct pci_device_id eni_pci_tbl[] = { { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_FPGA), 0 /* FPGA */ }, { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_ASIC), 1 /* ASIC */ }, { 0, } diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 22dcab952a24..6b6368a56526 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -2030,7 +2030,7 @@ static void firestream_remove_one(struct pci_dev *pdev) func_exit (); } -static struct pci_device_id firestream_pci_tbl[] = { +static const struct pci_device_id firestream_pci_tbl[] = { { PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS50), FS_IS50}, { PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS155), FS_IS155}, { 0, } diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f0433adcd8fc..f8b7e86907cc 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2757,7 +2757,7 @@ static void fore200e_pca_remove_one(struct pci_dev *pci_dev) } -static struct pci_device_id fore200e_pca_tbl[] = { +static const struct pci_device_id fore200e_pca_tbl[] = { { PCI_VENDOR_ID_FORE, PCI_DEVICE_ID_FORE_PCA200E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, (unsigned long) &fore200e_bus[0] }, { 0, } diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 37ee21c5a5ca..8f6156d475d1 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -2851,7 +2851,7 @@ MODULE_PARM_DESC(irq_coalesce, "use interrupt coalescing (default 1)"); module_param(sdh, bool, 0); MODULE_PARM_DESC(sdh, "use SDH framing (default 0)"); -static struct pci_device_id he_pci_tbl[] = { +static const struct pci_device_id he_pci_tbl[] = { { PCI_VDEVICE(FORE, PCI_DEVICE_ID_FORE_HE), 0 }, { 0, } }; diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 0f18480b33b5..7e76b35f422c 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2867,7 +2867,7 @@ MODULE_PARM_DESC(max_tx_size, "maximum size of TX AAL5 frames"); MODULE_PARM_DESC(max_rx_size, "maximum size of RX AAL5 frames"); MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles"); -static struct pci_device_id hrz_pci_tbl[] = { +static const struct pci_device_id hrz_pci_tbl[] = { { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_HORIZON, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, { 0, } diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 60bacba03d17..b7a168c46692 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3725,7 +3725,7 @@ err_out_disable_pdev: return err; } -static struct pci_device_id idt77252_pci_tbl[] = +static const struct pci_device_id idt77252_pci_tbl[] = { { PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77252), 0 }, { 0, } diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index a4fa6c82261e..fc72b763fdd7 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -3266,7 +3266,7 @@ static void ia_remove_one(struct pci_dev *pdev) kfree(iadev); } -static struct pci_device_id ia_pci_tbl[] = { +static const struct pci_device_id ia_pci_tbl[] = { { PCI_VENDOR_ID_IPHASE, 0x0008, PCI_ANY_ID, PCI_ANY_ID, }, { PCI_VENDOR_ID_IPHASE, 0x0009, PCI_ANY_ID, PCI_ANY_ID, }, { 0,} diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index 1a9bc51284b0..2351dad78ff5 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -2589,7 +2589,7 @@ static int lanai_init_one(struct pci_dev *pci, return result; } -static struct pci_device_id lanai_pci_tbl[] = { +static const struct pci_device_id lanai_pci_tbl[] = { { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAI2) }, { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAIHB) }, { 0, } /* terminal entry */ diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index d879f3bca107..9588d80f318e 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -253,7 +253,7 @@ static void nicstar_remove_one(struct pci_dev *pcidev) kfree(card); } -static struct pci_device_id nicstar_pci_tbl[] = { +static const struct pci_device_id nicstar_pci_tbl[] = { { PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77201), 0 }, {0,} /* terminate list */ }; diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index c8f2ca6d8b29..585984ee7dbd 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -1476,7 +1476,7 @@ static void fpga_remove(struct pci_dev *dev) kfree(card); } -static struct pci_device_id fpga_pci_tbl[] = { +static const struct pci_device_id fpga_pci_tbl[] = { { 0x10ee, 0x0300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, { 0, } }; diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 07bdd51b3b9a..1ef67db03c8e 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1642,7 +1642,7 @@ out_free: MODULE_LICENSE("GPL"); -static struct pci_device_id zatm_pci_tbl[] = { +static const struct pci_device_id zatm_pci_tbl[] = { { PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1221), ZATM_COPPER }, { PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1225), 0 }, { 0, } diff --git a/drivers/isdn/hardware/eicon/divacapi.h b/drivers/isdn/hardware/eicon/divacapi.h index a315a2914d70..c4868a0d82f4 100644 --- a/drivers/isdn/hardware/eicon/divacapi.h +++ b/drivers/isdn/hardware/eicon/divacapi.h @@ -26,15 +26,7 @@ /*#define DEBUG */ - - - - - - - - - +#include <linux/types.h> #define IMPLEMENT_DTMF 1 #define IMPLEMENT_LINE_INTERCONNECT2 1 @@ -82,8 +74,6 @@ #define CODEC_PERMANENT 0x02 #define ADV_VOICE 0x03 #define MAX_CIP_TYPES 5 /* kind of CIP types for group optimization */ -#define C_IND_MASK_DWORDS ((MAX_APPL + 32) >> 5) - #define FAX_CONNECT_INFO_BUFFER_SIZE 256 #define NCPI_BUFFER_SIZE 256 @@ -265,8 +255,8 @@ struct _PLCI { word ncci_ring_list; byte inc_dis_ncci_table[MAX_CHANNELS_PER_PLCI]; t_std_internal_command internal_command_queue[MAX_INTERNAL_COMMAND_LEVELS]; - dword c_ind_mask_table[C_IND_MASK_DWORDS]; - dword group_optimization_mask_table[C_IND_MASK_DWORDS]; + DECLARE_BITMAP(c_ind_mask_table, MAX_APPL); + DECLARE_BITMAP(group_optimization_mask_table, MAX_APPL); byte RBuffer[200]; dword msg_in_queue[MSG_IN_QUEUE_SIZE/sizeof(dword)]; API_SAVE saved_msg; diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c index 3b11422b1cce..eadd1ed1e014 100644 --- a/drivers/isdn/hardware/eicon/message.c +++ b/drivers/isdn/hardware/eicon/message.c @@ -23,9 +23,7 @@ * */ - - - +#include <linux/bitmap.h> #include "platform.h" #include "di_defs.h" @@ -35,19 +33,9 @@ #include "mdm_msg.h" #include "divasync.h" - - #define FILE_ "MESSAGE.C" #define dprintf - - - - - - - - /*------------------------------------------------------------------*/ /* This is options supported for all adapters that are server by */ /* XDI driver. Allo it is not necessary to ask it from every adapter*/ @@ -72,9 +60,6 @@ static dword diva_xdi_extended_features = 0; /*------------------------------------------------------------------*/ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci); -static void set_group_ind_mask(PLCI *plci); -static void clear_group_ind_mask_bit(PLCI *plci, word b); -static byte test_group_ind_mask_bit(PLCI *plci, word b); void AutomaticLaw(DIVA_CAPI_ADAPTER *); word CapiRelease(word); word CapiRegister(word); @@ -1087,106 +1072,6 @@ static void plci_remove(PLCI *plci) } /*------------------------------------------------------------------*/ -/* Application Group function helpers */ -/*------------------------------------------------------------------*/ - -static void set_group_ind_mask(PLCI *plci) -{ - word i; - - for (i = 0; i < C_IND_MASK_DWORDS; i++) - plci->group_optimization_mask_table[i] = 0xffffffffL; -} - -static void clear_group_ind_mask_bit(PLCI *plci, word b) -{ - plci->group_optimization_mask_table[b >> 5] &= ~(1L << (b & 0x1f)); -} - -static byte test_group_ind_mask_bit(PLCI *plci, word b) -{ - return ((plci->group_optimization_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0); -} - -/*------------------------------------------------------------------*/ -/* c_ind_mask operations for arbitrary MAX_APPL */ -/*------------------------------------------------------------------*/ - -static void clear_c_ind_mask(PLCI *plci) -{ - word i; - - for (i = 0; i < C_IND_MASK_DWORDS; i++) - plci->c_ind_mask_table[i] = 0; -} - -static byte c_ind_mask_empty(PLCI *plci) -{ - word i; - - i = 0; - while ((i < C_IND_MASK_DWORDS) && (plci->c_ind_mask_table[i] == 0)) - i++; - return (i == C_IND_MASK_DWORDS); -} - -static void set_c_ind_mask_bit(PLCI *plci, word b) -{ - plci->c_ind_mask_table[b >> 5] |= (1L << (b & 0x1f)); -} - -static void clear_c_ind_mask_bit(PLCI *plci, word b) -{ - plci->c_ind_mask_table[b >> 5] &= ~(1L << (b & 0x1f)); -} - -static byte test_c_ind_mask_bit(PLCI *plci, word b) -{ - return ((plci->c_ind_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0); -} - -static void dump_c_ind_mask(PLCI *plci) -{ - word i, j, k; - dword d; - char *p; - char buf[40]; - - for (i = 0; i < C_IND_MASK_DWORDS; i += 4) - { - p = buf + 36; - *p = '\0'; - for (j = 0; j < 4; j++) - { - if (i + j < C_IND_MASK_DWORDS) - { - d = plci->c_ind_mask_table[i + j]; - for (k = 0; k < 8; k++) - { - *(--p) = hex_asc_lo(d); - d >>= 4; - } - } - else if (i != 0) - { - for (k = 0; k < 8; k++) - *(--p) = ' '; - } - *(--p) = ' '; - } - dbug(1, dprintf("c_ind_mask =%s", (char *) p)); - } -} - - - - - -#define dump_plcis(a) - - - -/*------------------------------------------------------------------*/ /* translation function for each message */ /*------------------------------------------------------------------*/ @@ -1457,13 +1342,13 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a, return 1; } else if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) { - clear_c_ind_mask_bit(plci, (word)(appl->Id - 1)); - dump_c_ind_mask(plci); + __clear_bit(appl->Id - 1, plci->c_ind_mask_table); + dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table)); Reject = GET_WORD(parms[0].info); dbug(1, dprintf("Reject=0x%x", Reject)); if (Reject) { - if (c_ind_mask_empty(plci)) + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { if ((Reject & 0xff00) == 0x3400) { @@ -1553,11 +1438,8 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a, sig_req(plci, CALL_RES, 0); } - for (i = 0; i < max_appl; i++) { - if (test_c_ind_mask_bit(plci, i)) { - sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED); - } - } + for_each_set_bit(i, plci->c_ind_mask_table, max_appl) + sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED); } } return 1; @@ -1584,13 +1466,10 @@ static byte disconnect_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a, { if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) { - clear_c_ind_mask_bit(plci, (word)(appl->Id - 1)); + __clear_bit(appl->Id - 1, plci->c_ind_mask_table); plci->appl = appl; - for (i = 0; i < max_appl; i++) - { - if (test_c_ind_mask_bit(plci, i)) - sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0); - } + for_each_set_bit(i, plci->c_ind_mask_table, max_appl) + sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0); plci->State = OUTG_DIS_PENDING; } if (plci->Sig.Id && plci->appl) @@ -1634,7 +1513,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a, { /* clear ind mask bit, just in case of collsion of */ /* DISCONNECT_IND and CONNECT_RES */ - clear_c_ind_mask_bit(plci, (word)(appl->Id - 1)); + __clear_bit(appl->Id - 1, plci->c_ind_mask_table); ncci_free_receive_buffers(plci, 0); if (plci_remove_check(plci)) { @@ -1642,7 +1521,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a, } if (plci->State == INC_DIS_PENDING || plci->State == SUSPENDING) { - if (c_ind_mask_empty(plci)) { + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { if (plci->State != SUSPENDING) plci->State = IDLE; dbug(1, dprintf("chs=%d", plci->channels)); if (!plci->channels) { @@ -3351,13 +3230,11 @@ static byte select_b_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a, } plci->State = INC_CON_CONNECTED_ALERT; plci->appl = appl; - clear_c_ind_mask_bit(plci, (word)(appl->Id - 1)); - dump_c_ind_mask(plci); - for (i = 0; i < max_appl; i++) /* disconnect the other appls */ - { /* its quasi a connect */ - if (test_c_ind_mask_bit(plci, i)) - sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED); - } + __clear_bit(appl->Id - 1, plci->c_ind_mask_table); + dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table)); + /* disconnect the other appls its quasi a connect */ + for_each_set_bit(i, plci->c_ind_mask_table, max_appl) + sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED); } api_save_msg(msg, "s", &plci->saved_msg); @@ -5692,19 +5569,17 @@ static void sig_ind(PLCI *plci) cip = find_cip(a, parms[4], parms[6]); cip_mask = 1L << cip; dbug(1, dprintf("cip=%d,cip_mask=%lx", cip, cip_mask)); - clear_c_ind_mask(plci); + bitmap_zero(plci->c_ind_mask_table, MAX_APPL); if (!remove_started && !a->adapter_disabled) { - set_c_ind_mask_bit(plci, MAX_APPL); group_optimization(a, plci); - for (i = 0; i < max_appl; i++) { + for_each_set_bit(i, plci->group_optimization_mask_table, max_appl) { if (application[i].Id && (a->CIP_Mask[i] & 1 || a->CIP_Mask[i] & cip_mask) - && CPN_filter_ok(parms[0], a, i) - && test_group_ind_mask_bit(plci, i)) { + && CPN_filter_ok(parms[0], a, i)) { dbug(1, dprintf("storedcip_mask[%d]=0x%lx", i, a->CIP_Mask[i])); - set_c_ind_mask_bit(plci, i); - dump_c_ind_mask(plci); + __set_bit(i, plci->c_ind_mask_table); + dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table)); plci->State = INC_CON_PENDING; plci->call_dir = (plci->call_dir & ~(CALL_DIR_OUT | CALL_DIR_ORIGINATE)) | CALL_DIR_IN | CALL_DIR_ANSWER; @@ -5750,10 +5625,9 @@ static void sig_ind(PLCI *plci) SendMultiIE(plci, Id, multi_pi_parms, PI, 0x210, true)); } } - clear_c_ind_mask_bit(plci, MAX_APPL); - dump_c_ind_mask(plci); + dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table)); } - if (c_ind_mask_empty(plci)) { + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { sig_req(plci, HANGUP, 0); send_req(plci); plci->State = IDLE; @@ -5994,13 +5868,13 @@ static void sig_ind(PLCI *plci) break; case RESUME: - clear_c_ind_mask_bit(plci, (word)(plci->appl->Id - 1)); + __clear_bit(plci->appl->Id - 1, plci->c_ind_mask_table); PUT_WORD(&resume_cau[4], GOOD); sendf(plci->appl, _FACILITY_I, Id, 0, "ws", (word)3, resume_cau); break; case SUSPEND: - clear_c_ind_mask(plci); + bitmap_zero(plci->c_ind_mask_table, MAX_APPL); if (plci->NL.Id && !plci->nl_remove_id) { mixer_remove(plci); @@ -6037,15 +5911,12 @@ static void sig_ind(PLCI *plci) if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) { - for (i = 0; i < max_appl; i++) - { - if (test_c_ind_mask_bit(plci, i)) - sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0); - } + for_each_set_bit(i, plci->c_ind_mask_table, max_appl) + sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0); } else { - clear_c_ind_mask(plci); + bitmap_zero(plci->c_ind_mask_table, MAX_APPL); } if (!plci->appl) { @@ -6055,7 +5926,7 @@ static void sig_ind(PLCI *plci) a->listen_active--; } plci->State = INC_DIS_PENDING; - if (c_ind_mask_empty(plci)) + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { plci->State = IDLE; if (plci->NL.Id && !plci->nl_remove_id) @@ -6341,14 +6212,10 @@ static void SendInfo(PLCI *plci, dword Id, byte **parms, byte iesent) || Info_Number == DSP || Info_Number == UUI) { - for (j = 0; j < max_appl; j++) - { - if (test_c_ind_mask_bit(plci, j)) - { - dbug(1, dprintf("Ovl_Ind")); - iesent = true; - sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element); - } + for_each_set_bit(j, plci->c_ind_mask_table, max_appl) { + dbug(1, dprintf("Ovl_Ind")); + iesent = true; + sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element); } } } /* all other signalling states */ @@ -6416,14 +6283,10 @@ static byte SendMultiIE(PLCI *plci, dword Id, byte **parms, byte ie_type, } else if (!plci->appl && Info_Number) { /* overlap receiving broadcast */ - for (j = 0; j < max_appl; j++) - { - if (test_c_ind_mask_bit(plci, j)) - { - iesent = true; - dbug(1, dprintf("Mlt_Ovl_Ind")); - sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element); - } + for_each_set_bit(j, plci->c_ind_mask_table, max_appl) { + iesent = true; + dbug(1, dprintf("Mlt_Ovl_Ind")); + sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element); } } /* all other signalling states */ else if (Info_Number @@ -7270,7 +7133,6 @@ static word get_plci(DIVA_CAPI_ADAPTER *a) word i, j; PLCI *plci; - dump_plcis(a); for (i = 0; i < a->max_plci && a->plci[i].Id; i++); if (i == a->max_plci) { dbug(1, dprintf("get_plci: out of PLCIs")); @@ -7321,8 +7183,8 @@ static word get_plci(DIVA_CAPI_ADAPTER *a) plci->ncci_ring_list = 0; for (j = 0; j < MAX_CHANNELS_PER_PLCI; j++) plci->inc_dis_ncci_table[j] = 0; - clear_c_ind_mask(plci); - set_group_ind_mask(plci); + bitmap_zero(plci->c_ind_mask_table, MAX_APPL); + bitmap_fill(plci->group_optimization_mask_table, MAX_APPL); plci->fax_connect_info_length = 0; plci->nsf_control_bits = 0; plci->ncpi_state = 0x00; @@ -9373,10 +9235,10 @@ word CapiRelease(word Id) if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) { - if (test_c_ind_mask_bit(plci, (word)(Id - 1))) + if (test_bit(Id - 1, plci->c_ind_mask_table)) { - clear_c_ind_mask_bit(plci, (word)(Id - 1)); - if (c_ind_mask_empty(plci)) + __clear_bit(Id - 1, plci->c_ind_mask_table); + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { sig_req(plci, HANGUP, 0); send_req(plci); @@ -9384,10 +9246,10 @@ word CapiRelease(word Id) } } } - if (test_c_ind_mask_bit(plci, (word)(Id - 1))) + if (test_bit(Id - 1, plci->c_ind_mask_table)) { - clear_c_ind_mask_bit(plci, (word)(Id - 1)); - if (c_ind_mask_empty(plci)) + __clear_bit(Id - 1, plci->c_ind_mask_table); + if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { if (!plci->appl) { @@ -9452,7 +9314,7 @@ word CapiRelease(word Id) static word plci_remove_check(PLCI *plci) { if (!plci) return true; - if (!plci->NL.Id && c_ind_mask_empty(plci)) + if (!plci->NL.Id && bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) { if (plci->Sig.Id == 0xff) plci->Sig.Id = 0; @@ -14735,7 +14597,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci) word appl_number_group_type[MAX_APPL]; PLCI *auxplci; - set_group_ind_mask(plci); /* all APPLs within this inc. call are allowed to dial in */ + /* all APPLs within this inc. call are allowed to dial in */ + bitmap_fill(plci->group_optimization_mask_table, MAX_APPL); if (!a->group_optimization_enabled) { @@ -14771,13 +14634,12 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci) if (a->plci[k].Id) { auxplci = &a->plci[k]; - if (auxplci->appl == &application[i]) /* application has a busy PLCI */ - { + if (auxplci->appl == &application[i]) { + /* application has a busy PLCI */ busy = true; dbug(1, dprintf("Appl 0x%x is busy", i + 1)); - } - else if (test_c_ind_mask_bit(auxplci, i)) /* application has an incoming call pending */ - { + } else if (test_bit(i, plci->c_ind_mask_table)) { + /* application has an incoming call pending */ busy = true; dbug(1, dprintf("Appl 0x%x has inc. call pending", i + 1)); } @@ -14826,7 +14688,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci) if (appl_number_group_type[i] == appl_number_group_type[j]) { dbug(1, dprintf("Appl 0x%x is member of group 0x%x, no call", j + 1, appl_number_group_type[j])); - clear_group_ind_mask_bit(plci, j); /* disable call on other group members */ + /* disable call on other group members */ + __clear_bit(j, plci->group_optimization_mask_table); appl_number_group_type[j] = 0; /* remove disabled group member from group list */ } } @@ -14834,7 +14697,7 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci) } else /* application should not get a call */ { - clear_group_ind_mask_bit(plci, i); + __clear_bit(i, plci->group_optimization_mask_table); } } diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index cbb4f8566bbe..d09b2b46ab63 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -20,7 +20,7 @@ #include <linux/if_arcnet.h> #ifdef __KERNEL__ -#include <linux/irqreturn.h> +#include <linux/interrupt.h> /* * RECON_THRESHOLD is the maximum number of RECON messages to receive diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 01cab9548785..eb7f76753c9c 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -109,7 +109,7 @@ static struct attribute *com20020_state_attrs[] = { NULL, }; -static struct attribute_group com20020_state_group = { +static const struct attribute_group com20020_state_group = { .name = NULL, .attrs = com20020_state_attrs, }; diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 770623a0cc01..040b493f60ae 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -759,7 +759,7 @@ static struct attribute *per_bond_attrs[] = { NULL, }; -static struct attribute_group bonding_group = { +static const struct attribute_group bonding_group = { .name = "bonding", .attrs = per_bond_attrs, }; diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 0e0df0ba288c..f37ce0e1b603 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -1232,7 +1232,7 @@ static struct attribute *at91_sysfs_attrs[] = { NULL, }; -static struct attribute_group at91_sysfs_attr_group = { +static const struct attribute_group at91_sysfs_attr_group = { .attrs = at91_sysfs_attrs, }; diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index 2ba1a81500c1..12a53c8e8e1d 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -1875,7 +1875,7 @@ static struct attribute *ican3_sysfs_attrs[] = { NULL, }; -static struct attribute_group ican3_sysfs_attr_group = { +static const struct attribute_group ican3_sysfs_attr_group = { .attrs = ican3_sysfs_attrs, }; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 5bcdd33101b0..7fa19d4a8e13 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -810,31 +810,40 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, mutex_unlock(&chip->reg_lock); } -static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) +static int mv88e6xxx_energy_detect_read(struct mv88e6xxx_chip *chip, int port, + struct ethtool_eee *eee) { - struct mv88e6xxx_chip *chip = ds->priv; - u16 reg; int err; - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE)) + if (!chip->info->ops->phy_energy_detect_read) return -EOPNOTSUPP; - mutex_lock(&chip->reg_lock); - - err = mv88e6xxx_phy_read(chip, port, 16, ®); + /* assign eee->eee_enabled and eee->tx_lpi_enabled */ + err = chip->info->ops->phy_energy_detect_read(chip, port, eee); if (err) - goto out; + return err; - e->eee_enabled = !!(reg & 0x0200); - e->tx_lpi_enabled = !!(reg & 0x0100); + /* assign eee->eee_active */ + return mv88e6xxx_port_status_eee(chip, port, eee); +} - err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); - if (err) - goto out; +static int mv88e6xxx_energy_detect_write(struct mv88e6xxx_chip *chip, int port, + struct ethtool_eee *eee) +{ + if (!chip->info->ops->phy_energy_detect_write) + return -EOPNOTSUPP; - e->eee_active = !!(reg & MV88E6352_PORT_STS_EEE); -out: + return chip->info->ops->phy_energy_detect_write(chip, port, eee); +} + +static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, + struct ethtool_eee *e) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_energy_detect_read(chip, port, e); mutex_unlock(&chip->reg_lock); return err; @@ -844,26 +853,10 @@ static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e) { struct mv88e6xxx_chip *chip = ds->priv; - u16 reg; int err; - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE)) - return -EOPNOTSUPP; - mutex_lock(&chip->reg_lock); - - err = mv88e6xxx_phy_read(chip, port, 16, ®); - if (err) - goto out; - - reg &= ~0x0300; - if (e->eee_enabled) - reg |= 0x0200; - if (e->tx_lpi_enabled) - reg |= 0x0100; - - err = mv88e6xxx_phy_write(chip, port, 16, reg); -out: + err = mv88e6xxx_energy_detect_write(chip, port, e); mutex_unlock(&chip->reg_lock); return err; @@ -926,6 +919,22 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, dev_err(ds->dev, "p%d: failed to update state\n", port); } +static int mv88e6xxx_pot_setup(struct mv88e6xxx_chip *chip) +{ + if (chip->info->ops->pot_clear) + return chip->info->ops->pot_clear(chip); + + return 0; +} + +static int mv88e6xxx_rsvd2cpu_setup(struct mv88e6xxx_chip *chip) +{ + if (chip->info->ops->mgmt_rsvd2cpu) + return chip->info->ops->mgmt_rsvd2cpu(chip); + + return 0; +} + static int mv88e6xxx_atu_setup(struct mv88e6xxx_chip *chip) { int err; @@ -2116,7 +2125,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) goto unlock; /* Setup Switch Global 2 Registers */ - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) { + if (chip->info->global2_addr) { err = mv88e6xxx_g2_setup(chip); if (err) goto unlock; @@ -2142,16 +2151,13 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; - /* Some generations have the configuration of sending reserved - * management frames to the CPU in global2, others in - * global1. Hence it does not fit the two setup functions - * above. - */ - if (chip->info->ops->mgmt_rsvd2cpu) { - err = chip->info->ops->mgmt_rsvd2cpu(chip); - if (err) - goto unlock; - } + err = mv88e6xxx_pot_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_rsvd2cpu_setup(chip); + if (err) + goto unlock; unlock: mutex_unlock(&chip->reg_lock); @@ -2385,7 +2391,8 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, .reset = mv88e6185_g1_reset, @@ -2408,7 +2415,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = { .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, .stats_get_stats = mv88e6095_stats_get_stats, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, .reset = mv88e6185_g1_reset, @@ -2441,7 +2448,8 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2467,7 +2475,8 @@ static const struct mv88e6xxx_ops mv88e6123_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2496,7 +2505,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, .reset = mv88e6185_g1_reset, @@ -2512,6 +2521,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2533,6 +2544,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2563,7 +2575,8 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2587,7 +2600,8 @@ static const struct mv88e6xxx_ops mv88e6165_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2619,7 +2633,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2633,6 +2648,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -2653,7 +2670,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2686,7 +2704,8 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2700,6 +2719,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -2720,7 +2741,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2746,7 +2768,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, .reset = mv88e6185_g1_reset, @@ -2762,6 +2784,8 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2782,6 +2806,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -2796,6 +2821,8 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2816,6 +2843,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -2830,6 +2858,8 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2850,6 +2880,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -2864,6 +2895,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -2884,7 +2917,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -2899,6 +2933,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -2920,6 +2956,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -2934,6 +2971,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_speed = mv88e6185_port_set_speed, @@ -2952,20 +2991,23 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .stats_get_stats = mv88e6320_stats_get_stats, .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, }; static const struct mv88e6xxx_ops mv88e6321_ops = { - /* MV88E6XXX_FAMILY_6321 */ + /* MV88E6XXX_FAMILY_6320 */ .irl_init_all = mv88e6352_g2_irl_init_all, .get_eeprom = mv88e6xxx_g2_get_eeprom16, .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_speed = mv88e6185_port_set_speed, @@ -2997,6 +3039,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -3018,6 +3062,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -3049,7 +3094,8 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -3081,7 +3127,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -3095,6 +3142,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6352_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6352_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -3115,7 +3164,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .set_cpu_port = mv88e6095_g1_set_cpu_port, .set_egress_port = mv88e6095_g1_set_egress_port, .watchdog_ops = &mv88e6097_watchdog_ops, - .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, + .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, @@ -3130,6 +3180,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -3153,6 +3205,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -3167,6 +3220,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_energy_detect_read = mv88e6390_phy_energy_detect_read, + .phy_energy_detect_write = mv88e6390_phy_energy_detect_write, .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -3190,6 +3245,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .set_egress_port = mv88e6390_g1_set_egress_port, .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, + .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, @@ -3206,12 +3262,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6097, .ops = &mv88e6085_ops, }, @@ -3224,11 +3282,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, .atu_move_port_mask = 0xf, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6095, .ops = &mv88e6095_ops, }, @@ -3241,12 +3300,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6097, .ops = &mv88e6097_ops, }, @@ -3259,12 +3320,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6165, .ops = &mv88e6123_ops, }, @@ -3277,11 +3340,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, .atu_move_port_mask = 0xf, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6185, .ops = &mv88e6131_ops, }, @@ -3294,11 +3358,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .atu_move_port_mask = 0x1f, + .g2_irqs = 10, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6341, .ops = &mv88e6141_ops, }, @@ -3311,12 +3377,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6165, .ops = &mv88e6161_ops, }, @@ -3329,12 +3397,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6165, .ops = &mv88e6165_ops, }, @@ -3347,12 +3417,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6171_ops, }, @@ -3365,12 +3437,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6172_ops, }, @@ -3383,12 +3457,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6175_ops, }, @@ -3401,12 +3477,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6176_ops, }, @@ -3419,11 +3497,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, .atu_move_port_mask = 0xf, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6185, .ops = &mv88e6185_ops, }, @@ -3436,12 +3515,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .tag_protocol = DSA_TAG_PROTO_DSA, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .pvt = true, + .multi_chip = true, .atu_move_port_mask = 0x1f, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6190_ops, }, @@ -3454,12 +3535,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6190x_ops, }, @@ -3472,12 +3555,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6191_ops, }, @@ -3490,12 +3575,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6240_ops, }, @@ -3508,12 +3595,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6290_ops, }, @@ -3526,12 +3615,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6320, .ops = &mv88e6320_ops, }, @@ -3544,11 +3634,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, .atu_move_port_mask = 0xf, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6320, .ops = &mv88e6321_ops, }, @@ -3561,11 +3652,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .atu_move_port_mask = 0x1f, + .g2_irqs = 10, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6341, .ops = &mv88e6341_ops, }, @@ -3578,12 +3671,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6350_ops, }, @@ -3596,12 +3691,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6351, .ops = &mv88e6351_ops, }, @@ -3614,12 +3711,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .g2_irqs = 10, .atu_move_port_mask = 0xf, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6352, .ops = &mv88e6352_ops, }, [MV88E6390] = { @@ -3631,12 +3730,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6390_ops, }, [MV88E6390X] = { @@ -3648,12 +3749,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, + .global2_addr = 0x1c, .age_time_coeff = 3750, .g1_irqs = 9, + .g2_irqs = 14, .atu_move_port_mask = 0x1f, .pvt = true, + .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, - .flags = MV88E6XXX_FLAGS_FAMILY_6390, .ops = &mv88e6390x_ops, }, }; @@ -3723,7 +3826,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, { if (sw_addr == 0) chip->smi_ops = &mv88e6xxx_smi_single_chip_ops; - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP)) + else if (chip->info->multi_chip) chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops; else return -EINVAL; @@ -3971,7 +4074,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) if (err) goto out; - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) { + if (chip->info->g2_irqs > 0) { err = mv88e6xxx_g2_irq_setup(chip); if (err) goto out_g1_irq; @@ -3991,7 +4094,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) out_mdio: mv88e6xxx_mdios_unregister(chip); out_g2_irq: - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT) && chip->irq > 0) + if (chip->info->g2_irqs > 0 && chip->irq > 0) mv88e6xxx_g2_irq_free(chip); out_g1_irq: if (chip->irq > 0) { @@ -4013,7 +4116,7 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) mv88e6xxx_mdios_unregister(chip); if (chip->irq > 0) { - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) + if (chip->info->g2_irqs > 0) mv88e6xxx_g2_irq_free(chip); mv88e6xxx_g1_irq_free(chip); } diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 086444016352..9111e1316250 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -97,133 +97,6 @@ enum mv88e6xxx_family { MV88E6XXX_FAMILY_6390, /* 6190 6190X 6191 6290 6390 6390X */ }; -enum mv88e6xxx_cap { - /* Energy Efficient Ethernet. - */ - MV88E6XXX_CAP_EEE, - - /* Multi-chip Addressing Mode. - * Some chips respond to only 2 registers of its own SMI device address - * when it is non-zero, and use indirect access to internal registers. - */ - MV88E6XXX_CAP_SMI_CMD, /* (0x00) SMI Command */ - MV88E6XXX_CAP_SMI_DATA, /* (0x01) SMI Data */ - - /* Switch Global (1) Registers. - */ - MV88E6XXX_CAP_G1_ATU_FID, /* (0x01) ATU FID Register */ - MV88E6XXX_CAP_G1_VTU_FID, /* (0x02) VTU FID Register */ - - /* Switch Global 2 Registers. - * The device contains a second set of global 16-bit registers. - */ - MV88E6XXX_CAP_GLOBAL2, - MV88E6XXX_CAP_G2_INT, /* (0x00) Interrupt Status */ - MV88E6XXX_CAP_G2_MGMT_EN_2X, /* (0x02) MGMT Enable Register 2x */ - MV88E6XXX_CAP_G2_MGMT_EN_0X, /* (0x03) MGMT Enable Register 0x */ - MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ - - /* Per VLAN Spanning Tree Unit (STU). - * The Port State database, if present, is accessed through VTU - * operations and dedicated SID registers. See MV88E6352_G1_VTU_SID. - */ - MV88E6XXX_CAP_STU, - - /* VLAN Table Unit. - * The VTU is used to program 802.1Q VLANs. See MV88E6XXX_G1_VTU_OP. - */ - MV88E6XXX_CAP_VTU, -}; - -/* Bitmask of capabilities */ -#define MV88E6XXX_FLAG_EEE BIT_ULL(MV88E6XXX_CAP_EEE) - -#define MV88E6XXX_FLAG_SMI_CMD BIT_ULL(MV88E6XXX_CAP_SMI_CMD) -#define MV88E6XXX_FLAG_SMI_DATA BIT_ULL(MV88E6XXX_CAP_SMI_DATA) - -#define MV88E6XXX_FLAG_G1_VTU_FID BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID) - -#define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) -#define MV88E6XXX_FLAG_G2_INT BIT_ULL(MV88E6XXX_CAP_G2_INT) -#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) -#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) -#define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) - -/* Multi-chip Addressing Mode */ -#define MV88E6XXX_FLAGS_MULTI_CHIP \ - (MV88E6XXX_FLAG_SMI_CMD | \ - MV88E6XXX_FLAG_SMI_DATA) - -#define MV88E6XXX_FLAGS_FAMILY_6095 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6097 \ - (MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6165 \ - (MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6185 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6320 \ - (MV88E6XXX_FLAG_EEE | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6341 \ - (MV88E6XXX_FLAG_EEE | \ - MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6351 \ - (MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6352 \ - (MV88E6XXX_FLAG_EEE | \ - MV88E6XXX_FLAG_G1_VTU_FID | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ - MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - -#define MV88E6XXX_FLAGS_FAMILY_6390 \ - (MV88E6XXX_FLAG_EEE | \ - MV88E6XXX_FLAG_GLOBAL2 | \ - MV88E6XXX_FLAG_G2_INT | \ - MV88E6XXX_FLAGS_MULTI_CHIP) - struct mv88e6xxx_ops; struct mv88e6xxx_info { @@ -235,11 +108,18 @@ struct mv88e6xxx_info { unsigned int max_vid; unsigned int port_base_addr; unsigned int global1_addr; + unsigned int global2_addr; unsigned int age_time_coeff; unsigned int g1_irqs; + unsigned int g2_irqs; bool pvt; + + /* Multi-chip Addressing Mode. + * Some chips respond to only 2 registers of its own SMI device address + * when it is non-zero, and use indirect access to internal registers. + */ + bool multi_chip; enum dsa_tag_protocol tag_protocol; - unsigned long long flags; /* Mask for FromPort and ToPort value of PortVec used in ATU Move * operation. 0 means that the ATU Move operation is not supported. @@ -359,6 +239,15 @@ struct mv88e6xxx_ops { struct mii_bus *bus, int addr, int reg, u16 val); + /* Copper Energy Detect operations */ + int (*phy_energy_detect_read)(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); + int (*phy_energy_detect_write)(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); + + /* Priority Override Table operations */ + int (*pot_clear)(struct mv88e6xxx_chip *chip); + /* PHY Polling Unit (PPU) operations */ int (*ppu_enable)(struct mv88e6xxx_chip *chip); int (*ppu_disable)(struct mv88e6xxx_chip *chip); @@ -449,7 +338,6 @@ struct mv88e6xxx_ops { int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port); const struct mv88e6xxx_irq_ops *watchdog_ops; - /* Can be either in g1 or g2, so don't use a prefix */ int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip); /* Power on/off a SERDES interface */ @@ -482,12 +370,6 @@ struct mv88e6xxx_hw_stat { int type; }; -static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip, - unsigned long flags) -{ - return (chip->info->flags & flags) == flags; -} - static inline bool mv88e6xxx_has_pvt(struct mv88e6xxx_chip *chip) { return chip->info->pvt; diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 158d0f499874..16f556261022 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -22,48 +22,99 @@ static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) { - return mv88e6xxx_read(chip, MV88E6XXX_G2, reg, val); + return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val); } static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) { - return mv88e6xxx_write(chip, MV88E6XXX_G2, reg, val); + return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val); } static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) { - return mv88e6xxx_update(chip, MV88E6XXX_G2, reg, update); + return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update); } static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) { - return mv88e6xxx_wait(chip, MV88E6XXX_G2, reg, mask); + return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask); +} + +/* Offset 0x00: Interrupt Source Register */ + +static int mv88e6xxx_g2_int_source(struct mv88e6xxx_chip *chip, u16 *src) +{ + /* Read (and clear most of) the Interrupt Source bits */ + return mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SRC, src); +} + +/* Offset 0x01: Interrupt Mask Register */ + +static int mv88e6xxx_g2_int_mask(struct mv88e6xxx_chip *chip, u16 mask) +{ + return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, mask); } /* Offset 0x02: Management Enable 2x */ + +static int mv88e6xxx_g2_mgmt_enable_2x(struct mv88e6xxx_chip *chip, u16 en2x) +{ + return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, en2x); +} + /* Offset 0x03: Management Enable 0x */ -int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +static int mv88e6xxx_g2_mgmt_enable_0x(struct mv88e6xxx_chip *chip, u16 en0x) +{ + return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X, en0x); +} + +/* Offset 0x05: Switch Management Register */ + +static int mv88e6xxx_g2_switch_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip, + bool enable) +{ + u16 val; + int err; + + err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SWITCH_MGMT, &val); + if (err) + return err; + + if (enable) + val |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU; + else + val &= ~MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU; + + return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, val); +} + +int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) { int err; /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:2x as MGMT. + * addresses matching 01:80:c2:00:00:0x as MGMT. */ - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) { - err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, 0xffff); - if (err) - return err; - } + err = mv88e6xxx_g2_mgmt_enable_0x(chip, 0xffff); + if (err) + return err; + + return mv88e6xxx_g2_switch_mgmt_rsvd2cpu(chip, true); +} + +int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +{ + int err; /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:0x as MGMT. + * addresses matching 01:80:c2:00:00:2x as MGMT. */ - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) - return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X, - 0xffff); + err = mv88e6xxx_g2_mgmt_enable_2x(chip, 0xffff); + if (err) + return err; - return 0; + return mv88e6185_g2_mgmt_rsvd2cpu(chip); } /* Offset 0x06: Device Mapping Table register */ @@ -260,7 +311,7 @@ static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_PRIO_OVERRIDE, val); } -static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) +int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip) { int i, err; @@ -933,7 +984,7 @@ static irqreturn_t mv88e6xxx_g2_irq_thread_fn(int irq, void *dev_id) u16 reg; mutex_lock(&chip->reg_lock); - err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SOURCE, ®); + err = mv88e6xxx_g2_int_source(chip, ®); mutex_unlock(&chip->reg_lock); if (err) goto out; @@ -959,8 +1010,11 @@ static void mv88e6xxx_g2_irq_bus_lock(struct irq_data *d) static void mv88e6xxx_g2_irq_bus_sync_unlock(struct irq_data *d) { struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); + int err; - mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, ~chip->g2_irq.masked); + err = mv88e6xxx_g2_int_mask(chip, ~chip->g2_irq.masked); + if (err) + dev_err(chip->dev, "failed to mask interrupts\n"); mutex_unlock(&chip->reg_lock); } @@ -1063,9 +1117,6 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) * port at the highest priority. */ reg = MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI | (0x7 << 4); - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || - mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) - reg |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU | 0x7; err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, reg); if (err) return err; @@ -1080,12 +1131,5 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) if (err) return err; - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) { - /* Clear the priority override table. */ - err = mv88e6xxx_g2_clear_pot(chip); - if (err) - return err; - } - return 0; } diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 317ffd8f323d..669f59017b12 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -17,14 +17,27 @@ #include "chip.h" -#define MV88E6XXX_G2 0x1c - /* Offset 0x00: Interrupt Source Register */ -#define MV88E6XXX_G2_INT_SOURCE 0x00 +#define MV88E6XXX_G2_INT_SRC 0x00 +#define MV88E6XXX_G2_INT_SRC_WDOG 0x8000 +#define MV88E6XXX_G2_INT_SRC_JAM_LIMIT 0x4000 +#define MV88E6XXX_G2_INT_SRC_DUPLEX_MISMATCH 0x2000 +#define MV88E6XXX_G2_INT_SRC_WAKE_EVENT 0x1000 +#define MV88E6352_G2_INT_SRC_SERDES 0x0800 +#define MV88E6352_G2_INT_SRC_PHY 0x001f +#define MV88E6390_G2_INT_SRC_PHY 0x07fe + #define MV88E6XXX_G2_INT_SOURCE_WATCHDOG 15 /* Offset 0x01: Interrupt Mask Register */ -#define MV88E6XXX_G2_INT_MASK 0x01 +#define MV88E6XXX_G2_INT_MASK 0x01 +#define MV88E6XXX_G2_INT_MASK_WDOG 0x8000 +#define MV88E6XXX_G2_INT_MASK_JAM_LIMIT 0x4000 +#define MV88E6XXX_G2_INT_MASK_DUPLEX_MISMATCH 0x2000 +#define MV88E6XXX_G2_INT_MASK_WAKE_EVENT 0x1000 +#define MV88E6352_G2_INT_MASK_SERDES 0x0800 +#define MV88E6352_G2_INT_MASK_PHY 0x001f +#define MV88E6390_G2_INT_MASK_PHY 0x07fe /* Offset 0x02: MGMT Enable Register 2x */ #define MV88E6XXX_G2_MGMT_EN_2X 0x02 @@ -245,7 +258,11 @@ int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip); void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip); -int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); + +int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); +int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); + +int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip); extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; @@ -254,7 +271,7 @@ extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) { - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) { + if (chip->info->global2_addr) { dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n"); return -EOPNOTSUPP; } @@ -347,7 +364,17 @@ static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip) { } -static inline int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip) { return -EOPNOTSUPP; } diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c index 3500ac0ea848..317ae89cfa68 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.c +++ b/drivers/net/dsa/mv88e6xxx/phy.c @@ -13,7 +13,6 @@ #include <linux/mdio.h> #include <linux/module.h> -#include <net/dsa.h> #include "chip.h" #include "phy.h" @@ -247,3 +246,99 @@ int mv88e6xxx_phy_setup(struct mv88e6xxx_chip *chip) { return mv88e6xxx_phy_ppu_enable(chip); } + +/* Page 0, Register 16: Copper Specific Control Register 1 */ + +int mv88e6352_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val); + if (err) + return err; + + val &= MV88E6352_PHY_CSCTL1_ENERGY_DETECT_MASK; + + eee->eee_enabled = false; + eee->tx_lpi_enabled = false; + + switch (val) { + case MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP: + eee->tx_lpi_enabled = true; + /* fall through... */ + case MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV: + eee->eee_enabled = true; + } + + return 0; +} + +int mv88e6352_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val); + if (err) + return err; + + val &= ~MV88E6352_PHY_CSCTL1_ENERGY_DETECT_MASK; + + if (eee->eee_enabled) + val |= MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV; + if (eee->tx_lpi_enabled) + val |= MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP; + + return mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_CSCTL1, val); +} + +int mv88e6390_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val); + if (err) + return err; + + val &= MV88E6390_PHY_CSCTL1_ENERGY_DETECT_MASK; + + eee->eee_enabled = false; + eee->tx_lpi_enabled = false; + + switch (val) { + case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_AUTO: + case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_SW: + eee->tx_lpi_enabled = true; + /* fall through... */ + case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_AUTO: + case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_SW: + eee->eee_enabled = true; + } + + return 0; +} + +int mv88e6390_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val); + if (err) + return err; + + val &= ~MV88E6390_PHY_CSCTL1_ENERGY_DETECT_MASK; + + if (eee->eee_enabled) + val |= MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_AUTO; + if (eee->tx_lpi_enabled) + val |= MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_AUTO; + + return mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_CSCTL1, val); +} diff --git a/drivers/net/dsa/mv88e6xxx/phy.h b/drivers/net/dsa/mv88e6xxx/phy.h index 556b74a0502a..988802799ad6 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.h +++ b/drivers/net/dsa/mv88e6xxx/phy.h @@ -17,6 +17,19 @@ #define MV88E6XXX_PHY_PAGE 0x16 #define MV88E6XXX_PHY_PAGE_COPPER 0x00 +/* Page 0, Register 16: Copper Specific Control Register 1 */ +#define MV88E6XXX_PHY_CSCTL1 16 +#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_MASK 0x0300 +#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_OFF_MASK 0x0100 /* 0x */ +#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV 0x0200 +#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP 0x0300 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_MASK 0x0380 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_OFF_MASK 0x0180 /* 0xx */ +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_AUTO 0x0200 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_SW 0x0280 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_AUTO 0x0300 +#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_SW 0x0380 + /* PHY Registers accesses implementations */ int mv88e6165_phy_read(struct mv88e6xxx_chip *chip, struct mii_bus *bus, int addr, int reg, u16 *val); @@ -40,4 +53,13 @@ void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip); void mv88e6xxx_phy_destroy(struct mv88e6xxx_chip *chip); int mv88e6xxx_phy_setup(struct mv88e6xxx_chip *chip); +int mv88e6352_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); +int mv88e6352_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); +int mv88e6390_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); +int mv88e6390_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy, + struct ethtool_eee *eee); + #endif /*_MV88E6XXX_PHY_H */ diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index a7801f6668a5..2837a9128557 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -35,6 +35,23 @@ int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, return mv88e6xxx_write(chip, addr, reg, val); } +/* Offset 0x00: Port Status Register */ + +int mv88e6xxx_port_status_eee(struct mv88e6xxx_chip *chip, int port, + struct ethtool_eee *eee) +{ + u16 val; + int err; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &val); + if (err) + return err; + + eee->eee_active = !!(val & MV88E6352_PORT_STS_EEE); + + return 0; +} + /* Offset 0x01: MAC (or PCS or Physical) Control Register * * Link, Duplex and Flow Control have one force bit, one value bit. diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 8f3991bf1851..6fcab309cd85 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -216,9 +216,6 @@ /* Offset 0x13: OutFiltered Counter */ #define MV88E6XXX_PORT_OUT_FILTERED 0x13 -/* Offset 0x16: LED Control */ -#define MV88E6XXX_PORT_LED_CONTROL 0x16 - /* Offset 0x18: IEEE Priority Mapping Table */ #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE 0x18 #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE_UPDATE 0x8000 @@ -244,6 +241,9 @@ int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, u16 val); +int mv88e6xxx_port_status_eee(struct mv88e6xxx_chip *chip, int port, + struct ethtool_eee *eee); + int mv88e6352_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, phy_interface_t mode); int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index d0c165d2086e..d0a1f9ce3168 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -345,7 +345,7 @@ static void dummy_setup(struct net_device *dev) dev->flags &= ~IFF_MULTICAST; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST; - dev->features |= NETIF_F_ALL_TSO | NETIF_F_UFO; + dev->features |= NETIF_F_ALL_TSO; dev->features |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX; dev->features |= NETIF_F_GSO_ENCAP_ALL; dev->hw_features |= dev->features; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 0938294f640a..e9282c924621 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -129,6 +129,7 @@ #include <net/dcbnl.h> #include <linux/completion.h> #include <linux/cpumask.h> +#include <linux/interrupt.h> #define XGBE_DRV_NAME "amd-xgbe" #define XGBE_DRV_VERSION "1.0.3" diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 26d25749c3e4..6df2cad61647 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -68,7 +68,7 @@ #define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) #define GEM_MTU_MIN_SIZE ETH_MIN_MTU -#define MACB_NETIF_LSO (NETIF_F_TSO | NETIF_F_UFO) +#define MACB_NETIF_LSO NETIF_F_TSO #define MACB_WOL_HAS_MAGIC_PACKET (0x1 << 0) #define MACB_WOL_ENABLED (0x1 << 1) diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 9906fda76087..248a8fc45069 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -128,7 +128,7 @@ static void macb_remove(struct pci_dev *pdev) clk_unregister(plat_data->hclk); } -static struct pci_device_id dev_id_table[] = { +static const struct pci_device_id dev_id_table[] = { { PCI_DEVICE(CDNS_VENDOR_ID, CDNS_DEVICE_ID), }, { 0, } }; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index ebd353bc78ff..09e287597c74 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -105,6 +105,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = { "tx_total_sent", "tx_total_fwd", "tx_err_pko", + "tx_err_pki", "tx_err_link", "tx_err_drop", @@ -826,6 +827,8 @@ lio_get_ethtool_stats(struct net_device *netdev, data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_total_fwd); /*per_core_stats[j].link_stats[i].fromhost.fw_err_pko */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pko); + /*per_core_stats[j].link_stats[i].fromhost.fw_err_pki */ + data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pki); /*per_core_stats[j].link_stats[i].fromhost.fw_err_link */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_link); /*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost. @@ -1568,6 +1571,7 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev, tstats->fw_total_sent = rsp_tstats->fw_total_sent; tstats->fw_total_fwd = rsp_tstats->fw_total_fwd; tstats->fw_err_pko = rsp_tstats->fw_err_pko; + tstats->fw_err_pki = rsp_tstats->fw_err_pki; tstats->fw_err_link = rsp_tstats->fw_err_link; tstats->fw_err_drop = rsp_tstats->fw_err_drop; tstats->fw_tso = rsp_tstats->fw_tso; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 51583ae4b1eb..1d8fefa9ce64 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2544,8 +2544,8 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev, { struct octeon_droq_ops droq_ops; struct net_device *netdev; - static int cpu_id; - static int cpu_id_modulus; + int cpu_id; + int cpu_id_modulus; struct octeon_droq *droq; struct napi_struct *napi; int q, q_no, retval = 0; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 9b247102eb92..935ff299cdd9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1663,10 +1663,10 @@ static int setup_io_queues(struct octeon_device *octeon_dev, int ifidx) { struct octeon_droq_ops droq_ops; struct net_device *netdev; - static int cpu_id_modulus; + int cpu_id_modulus; struct octeon_droq *droq; struct napi_struct *napi; - static int cpu_id; + int cpu_id; int num_tx_descs; struct lio *lio; int retval = 0; diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 231dd7fbfb80..53aaf417e722 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -814,6 +814,7 @@ struct nic_tx_stats { u64 fw_tso; /* number of tso requests */ u64 fw_tso_fwd; /* number of packets segmented in tso */ u64 fw_tx_vxlan; + u64 fw_err_pki; }; struct oct_link_stats { diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 623e28ca736e..f10014f7ae88 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -876,11 +876,11 @@ int octeon_setup_instr_queues(struct octeon_device *oct) oct->num_iqs = 0; - oct->instr_queue[0] = vmalloc_node(sizeof(*oct->instr_queue[0]), + oct->instr_queue[0] = vzalloc_node(sizeof(*oct->instr_queue[0]), numa_node); if (!oct->instr_queue[0]) oct->instr_queue[0] = - vmalloc(sizeof(struct octeon_instr_queue)); + vzalloc(sizeof(struct octeon_instr_queue)); if (!oct->instr_queue[0]) return 1; memset(oct->instr_queue[0], 0, sizeof(struct octeon_instr_queue)); @@ -923,9 +923,9 @@ int octeon_setup_output_queues(struct octeon_device *oct) desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn23xx_vf)); } oct->num_oqs = 0; - oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node); + oct->droq[0] = vzalloc_node(sizeof(*oct->droq[0]), numa_node); if (!oct->droq[0]) - oct->droq[0] = vmalloc(sizeof(*oct->droq[0])); + oct->droq[0] = vzalloc(sizeof(*oct->droq[0])); if (!oct->droq[0]) return 1; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 2e190deb2233..f7b5d68eb4cf 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -145,6 +145,8 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct, for (i = 0; i < droq->max_count; i++) { pg_info = &droq->recv_buf_list[i].pg_info; + if (!pg_info) + continue; if (pg_info->dma) lio_unmap_ring(oct->pci_dev, @@ -275,12 +277,12 @@ int octeon_init_droq(struct octeon_device *oct, droq->max_count); droq->recv_buf_list = (struct octeon_recv_buffer *) - vmalloc_node(droq->max_count * + vzalloc_node(droq->max_count * OCT_DROQ_RECVBUF_SIZE, numa_node); if (!droq->recv_buf_list) droq->recv_buf_list = (struct octeon_recv_buffer *) - vmalloc(droq->max_count * + vzalloc(droq->max_count * OCT_DROQ_RECVBUF_SIZE); if (!droq->recv_buf_list) { dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n"); diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 0bc6a4ffce30..6a015362c340 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -793,7 +793,9 @@ static struct attribute *cxgb3_attrs[] = { NULL }; -static struct attribute_group cxgb3_attr_group = {.attrs = cxgb3_attrs }; +static const struct attribute_group cxgb3_attr_group = { + .attrs = cxgb3_attrs, +}; static ssize_t tm_attr_show(struct device *d, char *buf, int sched) @@ -880,7 +882,9 @@ static struct attribute *offload_attrs[] = { NULL }; -static struct attribute_group offload_attr_group = {.attrs = offload_attrs }; +static const struct attribute_group offload_attr_group = { + .attrs = offload_attrs, +}; /* * Sends an sk_buff to an offload queue driver diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ef4be781fd05..1978abbc6ceb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -338,10 +338,12 @@ struct adapter_params { unsigned int sf_nsec; /* # of flash sectors */ unsigned int sf_fw_start; /* start of FW image in flash */ - unsigned int fw_vers; - unsigned int bs_vers; /* bootstrap version */ - unsigned int tp_vers; - unsigned int er_vers; /* expansion ROM version */ + unsigned int fw_vers; /* firmware version */ + unsigned int bs_vers; /* bootstrap version */ + unsigned int tp_vers; /* TP microcode version */ + unsigned int er_vers; /* expansion ROM version */ + unsigned int scfg_vers; /* Serial Configuration version */ + unsigned int vpd_vers; /* VPD Version */ u8 api_vers[7]; unsigned short mtus[NMTUS]; @@ -1407,6 +1409,10 @@ int t4_get_fw_version(struct adapter *adapter, u32 *vers); int t4_get_bs_version(struct adapter *adapter, u32 *vers); int t4_get_tp_version(struct adapter *adapter, u32 *vers); int t4_get_exprom_version(struct adapter *adapter, u32 *vers); +int t4_get_scfg_version(struct adapter *adapter, u32 *vers); +int t4_get_vpd_version(struct adapter *adapter, u32 *vers); +int t4_get_version_info(struct adapter *adapter); +void t4_dump_version_info(struct adapter *adapter); int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, const u8 *fw_data, unsigned int fw_size, struct fw_hdr *card_fw, enum dev_state state, int *reset); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e403fa18f1b1..fdf220aa08d6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3610,11 +3610,8 @@ static int adap_init0(struct adapter *adap) * later reporting and B. to warn if the currently loaded firmware * is excessively mismatched relative to the driver.) */ - t4_get_fw_version(adap, &adap->params.fw_vers); - t4_get_bs_version(adap, &adap->params.bs_vers); - t4_get_tp_version(adap, &adap->params.tp_vers); - t4_get_exprom_version(adap, &adap->params.er_vers); + t4_get_version_info(adap); ret = t4_check_fw_version(adap); /* If firmware is too old (not supported by driver) force an update. */ if (ret) @@ -4560,56 +4557,8 @@ static void cxgb4_check_pcie_caps(struct adapter *adap) /* Dump basic information about the adapter */ static void print_adapter_info(struct adapter *adapter) { - /* Device information */ - dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n", - adapter->params.vpd.id, - CHELSIO_CHIP_RELEASE(adapter->params.chip)); - dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n", - adapter->params.vpd.sn, adapter->params.vpd.pn); - - /* Firmware Version */ - if (!adapter->params.fw_vers) - dev_warn(adapter->pdev_dev, "No firmware loaded\n"); - else - dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers), - FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers), - FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers), - FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers)); - - /* Bootstrap Firmware Version. (Some adapters don't have Bootstrap - * Firmware, so dev_info() is more appropriate here.) - */ - if (!adapter->params.bs_vers) - dev_info(adapter->pdev_dev, "No bootstrap loaded\n"); - else - dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers), - FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers), - FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers), - FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers)); - - /* TP Microcode Version */ - if (!adapter->params.tp_vers) - dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n"); - else - dev_info(adapter->pdev_dev, - "TP Microcode version: %u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers), - FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers), - FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers), - FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers)); - - /* Expansion ROM version */ - if (!adapter->params.er_vers) - dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n"); - else - dev_info(adapter->pdev_dev, - "Expansion ROM version: %u.%u.%u.%u\n", - FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers), - FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers), - FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers), - FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers)); + /* Hardware/Firmware/etc. Version/Revision IDs */ + t4_dump_version_info(adapter); /* Software/Hardware configuration */ dev_info(adapter->pdev_dev, "Configuration: %sNIC %s, %s capable\n", diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 82bf7aac6cdb..db41b3e99b81 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -913,7 +913,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0xd010, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0xea7c, - 0xf000, 0x11190, + 0xf000, 0x11110, + 0x11118, 0x11190, 0x19040, 0x1906c, 0x19078, 0x19080, 0x1908c, 0x190e4, @@ -1439,8 +1440,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1ff00, 0x1ff84, 0x1ffc0, 0x1ffc8, 0x30000, 0x30030, - 0x30038, 0x30038, - 0x30040, 0x30040, 0x30100, 0x30144, 0x30190, 0x301a0, 0x301a8, 0x301b8, @@ -1551,8 +1550,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x33c3c, 0x33c50, 0x33cf0, 0x33cfc, 0x34000, 0x34030, - 0x34038, 0x34038, - 0x34040, 0x34040, 0x34100, 0x34144, 0x34190, 0x341a0, 0x341a8, 0x341b8, @@ -1663,8 +1660,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x37c3c, 0x37c50, 0x37cf0, 0x37cfc, 0x38000, 0x38030, - 0x38038, 0x38038, - 0x38040, 0x38040, 0x38100, 0x38144, 0x38190, 0x381a0, 0x381a8, 0x381b8, @@ -1775,8 +1770,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x3bc3c, 0x3bc50, 0x3bcf0, 0x3bcfc, 0x3c000, 0x3c030, - 0x3c038, 0x3c038, - 0x3c040, 0x3c040, 0x3c100, 0x3c144, 0x3c190, 0x3c1a0, 0x3c1a8, 0x3c1b8, @@ -2040,12 +2033,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1190, 0x1194, 0x11a0, 0x11a4, 0x11b0, 0x11b4, - 0x11fc, 0x1258, - 0x1280, 0x12d4, - 0x12d9, 0x12d9, - 0x12de, 0x12de, - 0x12e3, 0x12e3, - 0x12e8, 0x133c, + 0x11fc, 0x1274, + 0x1280, 0x133c, 0x1800, 0x18fc, 0x3000, 0x302c, 0x3060, 0x30b0, @@ -2076,6 +2065,9 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x5ea0, 0x5eb0, 0x5ec0, 0x5ec0, 0x5ec8, 0x5ed0, + 0x5ee0, 0x5ee0, + 0x5ef0, 0x5ef0, + 0x5f00, 0x5f00, 0x6000, 0x6020, 0x6028, 0x6040, 0x6058, 0x609c, @@ -2133,6 +2125,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0xd300, 0xd31c, 0xdfc0, 0xdfe0, 0xe000, 0xf008, + 0xf010, 0xf018, + 0xf020, 0xf028, 0x11000, 0x11014, 0x11048, 0x1106c, 0x11074, 0x11088, @@ -2256,13 +2250,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1ff00, 0x1ff84, 0x1ffc0, 0x1ffc8, 0x30000, 0x30030, - 0x30038, 0x30038, - 0x30040, 0x30040, - 0x30048, 0x30048, - 0x30050, 0x30050, - 0x3005c, 0x30060, - 0x30068, 0x30068, - 0x30070, 0x30070, 0x30100, 0x30168, 0x30190, 0x301a0, 0x301a8, 0x301b8, @@ -2325,13 +2312,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x326a8, 0x326a8, 0x326ec, 0x326ec, 0x32a00, 0x32abc, - 0x32b00, 0x32b38, + 0x32b00, 0x32b18, + 0x32b20, 0x32b38, 0x32b40, 0x32b58, 0x32b60, 0x32b78, 0x32c00, 0x32c00, 0x32c08, 0x32c3c, - 0x32e00, 0x32e2c, - 0x32f00, 0x32f2c, 0x33000, 0x3302c, 0x33034, 0x33050, 0x33058, 0x33058, @@ -2396,13 +2382,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x33c38, 0x33c50, 0x33cf0, 0x33cfc, 0x34000, 0x34030, - 0x34038, 0x34038, - 0x34040, 0x34040, - 0x34048, 0x34048, - 0x34050, 0x34050, - 0x3405c, 0x34060, - 0x34068, 0x34068, - 0x34070, 0x34070, 0x34100, 0x34168, 0x34190, 0x341a0, 0x341a8, 0x341b8, @@ -2465,13 +2444,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x366a8, 0x366a8, 0x366ec, 0x366ec, 0x36a00, 0x36abc, - 0x36b00, 0x36b38, + 0x36b00, 0x36b18, + 0x36b20, 0x36b38, 0x36b40, 0x36b58, 0x36b60, 0x36b78, 0x36c00, 0x36c00, 0x36c08, 0x36c3c, - 0x36e00, 0x36e2c, - 0x36f00, 0x36f2c, 0x37000, 0x3702c, 0x37034, 0x37050, 0x37058, 0x37058, @@ -2545,8 +2523,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x40280, 0x40280, 0x40304, 0x40304, 0x40330, 0x4033c, - 0x41304, 0x413b8, - 0x413c0, 0x413c8, + 0x41304, 0x413c8, 0x413d0, 0x413dc, 0x413f0, 0x413f0, 0x41400, 0x4140c, @@ -3100,6 +3077,179 @@ int t4_get_exprom_version(struct adapter *adap, u32 *vers) } /** + * t4_get_vpd_version - return the VPD version + * @adapter: the adapter + * @vers: where to place the version + * + * Reads the VPD via the Firmware interface (thus this can only be called + * once we're ready to issue Firmware commands). The format of the + * VPD version is adapter specific. Returns 0 on success, an error on + * failure. + * + * Note that early versions of the Firmware didn't include the ability + * to retrieve the VPD version, so we zero-out the return-value parameter + * in that case to avoid leaving it with garbage in it. + * + * Also note that the Firmware will return its cached copy of the VPD + * Revision ID, not the actual Revision ID as written in the Serial + * EEPROM. This is only an issue if a new VPD has been written and the + * Firmware/Chip haven't yet gone through a RESET sequence. So it's best + * to defer calling this routine till after a FW_RESET_CMD has been issued + * if the Host Driver will be performing a full adapter initialization. + */ +int t4_get_vpd_version(struct adapter *adapter, u32 *vers) +{ + u32 vpdrev_param; + int ret; + + vpdrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_VPDREV)); + ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0, + 1, &vpdrev_param, vers); + if (ret) + *vers = 0; + return ret; +} + +/** + * t4_get_scfg_version - return the Serial Configuration version + * @adapter: the adapter + * @vers: where to place the version + * + * Reads the Serial Configuration Version via the Firmware interface + * (thus this can only be called once we're ready to issue Firmware + * commands). The format of the Serial Configuration version is + * adapter specific. Returns 0 on success, an error on failure. + * + * Note that early versions of the Firmware didn't include the ability + * to retrieve the Serial Configuration version, so we zero-out the + * return-value parameter in that case to avoid leaving it with + * garbage in it. + * + * Also note that the Firmware will return its cached copy of the Serial + * Initialization Revision ID, not the actual Revision ID as written in + * the Serial EEPROM. This is only an issue if a new VPD has been written + * and the Firmware/Chip haven't yet gone through a RESET sequence. So + * it's best to defer calling this routine till after a FW_RESET_CMD has + * been issued if the Host Driver will be performing a full adapter + * initialization. + */ +int t4_get_scfg_version(struct adapter *adapter, u32 *vers) +{ + u32 scfgrev_param; + int ret; + + scfgrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_SCFGREV)); + ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0, + 1, &scfgrev_param, vers); + if (ret) + *vers = 0; + return ret; +} + +/** + * t4_get_version_info - extract various chip/firmware version information + * @adapter: the adapter + * + * Reads various chip/firmware version numbers and stores them into the + * adapter Adapter Parameters structure. If any of the efforts fails + * the first failure will be returned, but all of the version numbers + * will be read. + */ +int t4_get_version_info(struct adapter *adapter) +{ + int ret = 0; + + #define FIRST_RET(__getvinfo) \ + do { \ + int __ret = __getvinfo; \ + if (__ret && !ret) \ + ret = __ret; \ + } while (0) + + FIRST_RET(t4_get_fw_version(adapter, &adapter->params.fw_vers)); + FIRST_RET(t4_get_bs_version(adapter, &adapter->params.bs_vers)); + FIRST_RET(t4_get_tp_version(adapter, &adapter->params.tp_vers)); + FIRST_RET(t4_get_exprom_version(adapter, &adapter->params.er_vers)); + FIRST_RET(t4_get_scfg_version(adapter, &adapter->params.scfg_vers)); + FIRST_RET(t4_get_vpd_version(adapter, &adapter->params.vpd_vers)); + + #undef FIRST_RET + return ret; +} + +/** + * t4_dump_version_info - dump all of the adapter configuration IDs + * @adapter: the adapter + * + * Dumps all of the various bits of adapter configuration version/revision + * IDs information. This is typically called at some point after + * t4_get_version_info() has been called. + */ +void t4_dump_version_info(struct adapter *adapter) +{ + /* Device information */ + dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n", + adapter->params.vpd.id, + CHELSIO_CHIP_RELEASE(adapter->params.chip)); + dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n", + adapter->params.vpd.sn, adapter->params.vpd.pn); + + /* Firmware Version */ + if (!adapter->params.fw_vers) + dev_warn(adapter->pdev_dev, "No firmware loaded\n"); + else + dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers)); + + /* Bootstrap Firmware Version. (Some adapters don't have Bootstrap + * Firmware, so dev_info() is more appropriate here.) + */ + if (!adapter->params.bs_vers) + dev_info(adapter->pdev_dev, "No bootstrap loaded\n"); + else + dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers)); + + /* TP Microcode Version */ + if (!adapter->params.tp_vers) + dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n"); + else + dev_info(adapter->pdev_dev, + "TP Microcode version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers)); + + /* Expansion ROM version */ + if (!adapter->params.er_vers) + dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n"); + else + dev_info(adapter->pdev_dev, + "Expansion ROM version: %u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers), + FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers), + FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers), + FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers)); + + /* Serial Configuration version */ + dev_info(adapter->pdev_dev, "Serial Configuration version: %#x\n", + adapter->params.scfg_vers); + + /* VPD Version */ + dev_info(adapter->pdev_dev, "VPD version: %#x\n", + adapter->params.vpd_vers); +} + +/** * t4_check_fw_version - check if the FW is supported with this driver * @adap: the adapter * diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 0ebed64d62d3..ad825fbc21a5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1124,6 +1124,8 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */ FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17, FW_PARAMS_PARAM_DEV_FWCACHE = 0x18, + FW_PARAMS_PARAM_DEV_SCFGREV = 0x1A, + FW_PARAMS_PARAM_DEV_VPDREV = 0x1B, FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR = 0x1C, FW_PARAMS_PARAM_DEV_MPSBGMAP = 0x1E, }; diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 17e566a8b345..84394b43c0a1 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1303,7 +1303,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 0x00, 'L', 'i', 'n', 'u', 'x' }; static int last_irq; - static int multiport_cnt; /* For four-port boards w/one EEPROM */ int i, irq; unsigned short sum; unsigned char *ee_data; @@ -1557,7 +1556,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } else if (ee_data[0] == 0xff && ee_data[1] == 0xff && ee_data[2] == 0) { sa_offset = 2; /* Grrr, damn Matrox boards. */ - multiport_cnt = 4; } #ifdef CONFIG_MIPS_COBALT if ((pdev->bus->number == 0) && diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c index 4ee042c034a1..1b79a6defd56 100644 --- a/drivers/net/ethernet/ec_bhf.c +++ b/drivers/net/ethernet/ec_bhf.c @@ -73,7 +73,7 @@ #define ETHERCAT_MASTER_ID 0x14 -static struct pci_device_id ids[] = { +static const struct pci_device_id ids[] = { { PCI_DEVICE(0x15ec, 0x5000), }, { 0, } }; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index a8db27e86a11..78cb20c67aa6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -595,7 +595,7 @@ static void hns_nic_self_test(struct net_device *ndev, set_bit(NIC_STATE_TESTING, &priv->state); if (if_running) - (void)dev_close(ndev); + dev_close(ndev); for (i = 0; i < SELF_TEST_TPYE_NUM; i++) { if (!st_param[i][1]) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index b45fdc98033d..f1bfae0c41d0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -1018,8 +1018,12 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx) struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx]; struct ixgbe_ring *ring; - ixgbe_for_each_ring(ring, q_vector->tx) - adapter->tx_ring[ring->queue_index] = NULL; + ixgbe_for_each_ring(ring, q_vector->tx) { + if (ring_is_xdp(ring)) + adapter->xdp_ring[ring->queue_index] = NULL; + else + adapter->tx_ring[ring->queue_index] = NULL; + } ixgbe_for_each_ring(ring, q_vector->rx) adapter->rx_ring[ring->queue_index] = NULL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f1dbdf26d8e1..0f867dcda65f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2214,7 +2214,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring, struct xdp_buff *xdp) { - int result = IXGBE_XDP_PASS; + int err, result = IXGBE_XDP_PASS; struct bpf_prog *xdp_prog; u32 act; @@ -2231,6 +2231,13 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, case XDP_TX: result = ixgbe_xmit_xdp_ring(adapter, xdp); break; + case XDP_REDIRECT: + err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); + if (!err) + result = IXGBE_XDP_TX; + else + result = IXGBE_XDP_CONSUMED; + break; default: bpf_warn_invalid_xdp_action(act); /* fallthrough */ @@ -2408,6 +2415,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ wmb(); writel(ring->next_to_use, ring->tail); + + xdp_do_flush_map(); } u64_stats_update_begin(&rx_ring->syncp); @@ -5810,6 +5819,9 @@ void ixgbe_down(struct ixgbe_adapter *adapter) usleep_range(10000, 20000); + /* synchronize_sched() needed for pending XDP buffers to drain */ + if (adapter->xdp_ring[0]) + synchronize_sched(); netif_tx_stop_all_queues(netdev); /* call carrier off first to avoid false dev_watchdog timeouts */ @@ -9823,6 +9835,53 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp) } } +static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_ring *ring; + int err; + + if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) + return -EINVAL; + + /* During program transitions its possible adapter->xdp_prog is assigned + * but ring has not been configured yet. In this case simply abort xmit. + */ + ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL; + if (unlikely(!ring)) + return -EINVAL; + + err = ixgbe_xmit_xdp_ring(adapter, xdp); + if (err != IXGBE_XDP_TX) + return -ENOMEM; + + return 0; +} + +static void ixgbe_xdp_flush(struct net_device *dev) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_ring *ring; + + /* Its possible the device went down between xdp xmit and flush so + * we need to ensure device is still up. + */ + if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) + return; + + ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL; + if (unlikely(!ring)) + return; + + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. + */ + wmb(); + writel(ring->next_to_use, ring->tail); + + return; +} + static const struct net_device_ops ixgbe_netdev_ops = { .ndo_open = ixgbe_open, .ndo_stop = ixgbe_close, @@ -9869,6 +9928,8 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port, .ndo_features_check = ixgbe_features_check, .ndo_xdp = ixgbe_xdp, + .ndo_xdp_xmit = ixgbe_xdp_xmit, + .ndo_xdp_flush = ixgbe_xdp_flush, }; /** diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b3d0c2e6347a..7e95cf547ff1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1027,7 +1027,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) unsigned int done[MTK_MAX_DEVS]; unsigned int bytes[MTK_MAX_DEVS]; u32 cpu, dma; - static int condition; int total = 0, i; memset(done, 0, sizeof(done)); @@ -1051,10 +1050,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) mac = 1; skb = tx_buf->skb; - if (!skb) { - condition = 1; + if (!skb) break; - } if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) { bytes[mac] += skb->len; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 9807ef814e42..f6963b0b4a55 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -57,6 +57,9 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, MLXSW_AFK_ELEMENT_MAX, }; @@ -104,6 +107,9 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6), MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32), MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8), diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1bd34d9a7b9e..c6c508941d23 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3679,15 +3679,17 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4, + MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, }; /* reg_htgt_trap_group @@ -3952,10 +3954,12 @@ MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2); */ MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8); -static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en) +static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en, + bool ipv6_en) { MLXSW_REG_ZERO(rgcr, payload); mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en); + mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en); } /* RITR - Router Interface Table Register @@ -4203,10 +4207,12 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, MLXSW_REG_ZERO(ritr, payload); mlxsw_reg_ritr_enable_set(payload, enable); mlxsw_reg_ritr_ipv4_set(payload, 1); + mlxsw_reg_ritr_ipv6_set(payload, 1); mlxsw_reg_ritr_type_set(payload, type); mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_ipv6_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); @@ -4718,6 +4724,7 @@ MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8); * Access: Index */ MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32); +MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16); enum mlxsw_reg_ralue_entry_type { MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1, @@ -4851,6 +4858,16 @@ static inline void mlxsw_reg_ralue_pack4(char *payload, mlxsw_reg_ralue_dip4_set(payload, dip); } +static inline void mlxsw_reg_ralue_pack6(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + const void *dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); +} + static inline void mlxsw_reg_ralue_act_remote_pack(char *payload, enum mlxsw_reg_ralue_trap_action trap_action, @@ -4954,6 +4971,7 @@ MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); * Access: Index */ MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); +MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16); enum mlxsw_reg_rauht_trap_action { MLXSW_REG_RAUHT_TRAP_ACTION_NOP, @@ -5018,6 +5036,15 @@ static inline void mlxsw_reg_rauht_pack4(char *payload, mlxsw_reg_rauht_dip4_set(payload, dip); } +static inline void mlxsw_reg_rauht_pack6(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, const char *dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6); + mlxsw_reg_rauht_dip6_memcpy_to(payload, dip); +} + /* RALEU - Router Algorithmic LPM ECMP Update Register * --------------------------------------------------- * The register enables updating the ECMP section in the action for multiple @@ -5216,6 +5243,30 @@ MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); +#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20 + +/* reg_rauhtd_ipv6_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_dip + * Destination IPv6 address. + * Access: RO + */ +MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10); + static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, int ent_index, u16 *p_rif, u32 *p_dip) @@ -5224,6 +5275,14 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); } +static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload, + int rec_index, u16 *p_rif, + char *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index); + mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 60bf8f27cc00..88b668ba0d8a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -58,6 +58,7 @@ #include <net/tc_act/tc_mirred.h> #include <net/netevent.h> #include <net/tc_act/tc_sample.h> +#include <net/addrconf.h> #include "spectrum.h" #include "pci.h" @@ -3333,15 +3334,47 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), /* L3 traps */ - MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false), - MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), - MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), - MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false), - MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false), + MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false), + MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), + MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false), + MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false), /* PKT Sample trap */ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU, false, SP_IP2ME, DISCARD), @@ -3376,15 +3409,17 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: rate = 16 * 1024; burst_size = 10; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: rate = 1024; burst_size = 7; break; @@ -3433,21 +3468,23 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) priority = 5; tc = 5; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: priority = 4; tc = 4; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: priority = 3; tc = 3; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: priority = 2; tc = 2; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: priority = 1; @@ -4357,6 +4394,10 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .priority = 10, /* Must be called before FIB notifier block */ }; +static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { + .notifier_call = mlxsw_sp_inet6addr_event, +}; + static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { .notifier_call = mlxsw_sp_router_netevent_event, }; @@ -4377,6 +4418,7 @@ static int __init mlxsw_sp_module_init(void) register_netdevice_notifier(&mlxsw_sp_netdevice_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); register_netevent_notifier(&mlxsw_sp_router_netevent_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); @@ -4393,6 +4435,7 @@ err_pci_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp_driver); err_core_driver_register: unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; @@ -4403,6 +4446,7 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp_driver); unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 5ef98d4d0ab6..e848f06e34e6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -384,6 +384,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); void diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h index 85d5001a5818..fb8031828454 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h @@ -70,6 +70,9 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = { MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32), + MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2), + MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8), + MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6), MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 61a10f166f97..bc5173f1b5c1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -984,6 +984,9 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, }; static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 6afbe9ec64e2..bbd238e50f05 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -109,7 +109,6 @@ static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_BROADCAST] = 1, - [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP] = 1, [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL] = 1, [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST] = 1, @@ -117,6 +116,7 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4] = 1, + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, }; static const int *mlxsw_sp_packet_type_sfgc_types[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 21bb2bf62d3e..400ad4081660 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -212,11 +212,46 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f, + u16 n_proto) +{ + struct flow_dissector_key_ip *key, *mask; + + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) + return 0; + + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) { + dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n"); + return -EINVAL; + } + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->mask); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_, + key->ttl, mask->ttl); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN, + key->tos & 0x3, mask->tos & 0x3); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, + key->tos >> 6, mask->tos >> 6); + + return 0; +} + static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { + u16 n_proto_mask = 0; + u16 n_proto_key = 0; u16 addr_type = 0; u8 ip_proto = 0; int err; @@ -229,6 +264,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS) | BIT(FLOW_DISSECTOR_KEY_TCP) | + BIT(FLOW_DISSECTOR_KEY_IP) | BIT(FLOW_DISSECTOR_KEY_VLAN))) { dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); return -EOPNOTSUPP; @@ -253,8 +289,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_BASIC, f->mask); - u16 n_proto_key = ntohs(key->n_proto); - u16 n_proto_mask = ntohs(mask->n_proto); + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; @@ -324,6 +360,10 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, if (err) return err; + err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask); + if (err) + return err; + return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, rulei, f->exts); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 383fef5a8e24..e6d629f40f93 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -49,6 +49,9 @@ #include <net/ip_fib.h> #include <net/fib_rules.h> #include <net/l3mdev.h> +#include <net/addrconf.h> +#include <net/ndisc.h> +#include <net/ipv6.h> #include "spectrum.h" #include "core.h" @@ -304,7 +307,7 @@ static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); -#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) +#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1) struct mlxsw_sp_prefix_usage { DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); @@ -384,23 +387,23 @@ struct mlxsw_sp_fib_node { struct mlxsw_sp_fib_key key; }; -struct mlxsw_sp_fib_entry_params { - u32 tb_id; - u32 prio; - u8 tos; - u8 type; -}; - struct mlxsw_sp_fib_entry { struct list_head list; struct mlxsw_sp_fib_node *fib_node; enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; - struct mlxsw_sp_fib_entry_params params; bool offloaded; }; +struct mlxsw_sp_fib4_entry { + struct mlxsw_sp_fib_entry common; + u32 tb_id; + u32 prio; + u8 tos; + u8 type; +}; + enum mlxsw_sp_l3proto { MLXSW_SP_L3_PROTO_IPV4, MLXSW_SP_L3_PROTO_IPV6, @@ -428,6 +431,7 @@ struct mlxsw_sp_vr { u32 tb_id; /* kernel fib table id */ unsigned int rif_count; struct mlxsw_sp_fib *fib4; + struct mlxsw_sp_fib *fib6; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -625,7 +629,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) { - return !!vr->fib4; + return !!vr->fib4 || !!vr->fib6; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -694,7 +698,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, case MLXSW_SP_L3_PROTO_IPV4: return vr->fib4; case MLXSW_SP_L3_PROTO_IPV6: - BUG_ON(1); + return vr->fib6; } return NULL; } @@ -703,6 +707,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { struct mlxsw_sp_vr *vr; + int err; vr = mlxsw_sp_vr_find_unused(mlxsw_sp); if (!vr) @@ -710,12 +715,24 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr->fib4)) return ERR_CAST(vr->fib4); + vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(vr->fib6)) { + err = PTR_ERR(vr->fib6); + goto err_fib6_create; + } vr->tb_id = tb_id; return vr; + +err_fib6_create: + mlxsw_sp_fib_destroy(vr->fib4); + vr->fib4 = NULL; + return ERR_PTR(err); } static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { + mlxsw_sp_fib_destroy(vr->fib6); + vr->fib6 = NULL; mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; } @@ -773,7 +790,8 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { - if (!vr->rif_count && list_empty(&vr->fib4->node_list)) + if (!vr->rif_count && list_empty(&vr->fib4->node_list) && + list_empty(&vr->fib6->node_list)) mlxsw_sp_vr_destroy(vr); } @@ -929,8 +947,11 @@ mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) static void mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) { - unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long interval; + interval = min_t(unsigned long, + NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME), + NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME)); mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval); } @@ -965,6 +986,36 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + struct net_device *dev; + struct neighbour *n; + struct in6_addr dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif, + (char *) &dip); + + if (!mlxsw_sp->router->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dev = mlxsw_sp->router->rifs[rif]->dev; + n = neigh_lookup(&nd_tbl, &dip, dev); + if (!n) { + netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n", + &dip); + return; + } + + netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} + static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) @@ -988,6 +1039,15 @@ static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, } +static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + /* One record contains one entry. */ + mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); +} + static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) { @@ -997,7 +1057,8 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, rec_index); break; case MLXSW_REG_RAUHTD_TYPE_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); break; } } @@ -1022,22 +1083,20 @@ static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) return false; } -static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +static int +__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + enum mlxsw_reg_rauhtd_type type) { - char *rauhtd_pl; - u8 num_rec; - int i, err; - - rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); - if (!rauhtd_pl) - return -ENOMEM; + int i, num_rec; + int err; /* Make sure the neighbour's netdev isn't removed in the * process. */ rtnl_lock(); do { - mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); + mlxsw_reg_rauhtd_pack(rauhtd_pl, type); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), rauhtd_pl); if (err) { @@ -1051,6 +1110,27 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); rtnl_unlock(); + return err; +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_rauhtd_type type; + char *rauhtd_pl; + int err; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + type = MLXSW_REG_RAUHTD_TYPE_IPV4; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); + if (err) + goto out; + + type = MLXSW_REG_RAUHTD_TYPE_IPV6; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); +out: kfree(rauhtd_pl); return err; } @@ -1147,6 +1227,32 @@ mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, } static void +mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + enum mlxsw_reg_rauht_op op) +{ + struct neighbour *n = neigh_entry->key.n; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + const char *dip = n->primary_key; + + mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, + dip); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +static bool mlxsw_sp_neigh_ipv6_ignore(struct neighbour *n) +{ + /* Packets with a link-local destination address are trapped + * after LPM lookup and never reach the neighbour table, so + * there is no need to program such neighbours to the device. + */ + if (ipv6_addr_type((struct in6_addr *) &n->primary_key) & + IPV6_ADDR_LINKLOCAL) + return true; + return false; +} + +static void mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool adding) @@ -1154,11 +1260,17 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, if (!adding && !neigh_entry->connected) return; neigh_entry->connected = adding; - if (neigh_entry->key.n->tbl == &arp_tbl) + if (neigh_entry->key.n->tbl == &arp_tbl) { mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, mlxsw_sp_rauht_op(adding)); - else + } else if (neigh_entry->key.n->tbl == &nd_tbl) { + if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry->key.n)) + return; + mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, + mlxsw_sp_rauht_op(adding)); + } else { WARN_ON_ONCE(1); + } } struct mlxsw_sp_neigh_event_work { @@ -1227,7 +1339,7 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, p = ptr; /* We don't care about changes in the default table. */ - if (!p->dev || p->tbl != &arp_tbl) + if (!p->dev || (p->tbl != &arp_tbl && p->tbl != &nd_tbl)) return NOTIFY_DONE; /* We are in atomic context and can't take RTNL mutex, @@ -1246,7 +1358,7 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, case NETEVENT_NEIGH_UPDATE: n = ptr; - if (n->tbl != &arp_tbl) + if (n->tbl != &arp_tbl && n->tbl != &nd_tbl) return NOTIFY_DONE; mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); @@ -1340,6 +1452,7 @@ struct mlxsw_sp_nexthop { */ struct rhash_head ht_node; struct mlxsw_sp_nexthop_key key; + unsigned char gw_addr[sizeof(struct in6_addr)]; struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. @@ -1360,6 +1473,7 @@ struct mlxsw_sp_nexthop_group_key { struct mlxsw_sp_nexthop_group { struct rhash_head ht_node; struct list_head fib_list; /* list of fib entries that use this group */ + struct neigh_table *neigh_tbl; struct mlxsw_sp_nexthop_group_key key; u8 adj_index_valid:1, gateway:1; /* routes using the group use a gateway */ @@ -1677,7 +1791,6 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry; - struct fib_nh *fib_nh = nh->key.fib_nh; struct neighbour *n; u8 nud_state, dead; int err; @@ -1690,9 +1803,10 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (!n) { - n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, + nh->rif->dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -1754,10 +1868,10 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } -static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - struct mlxsw_sp_nexthop *nh, - struct fib_nh *fib_nh) +static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) { struct net_device *dev = fib_nh->nh_dev; struct in_device *in_dev; @@ -1766,6 +1880,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, nh->nh_grp = nh_grp; nh->key.fib_nh = fib_nh; + memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw)); err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; @@ -1795,16 +1910,16 @@ err_nexthop_neigh_init: return err; } -static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } -static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, - unsigned long event, struct fib_nh *fib_nh) +static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, struct fib_nh *fib_nh) { struct mlxsw_sp_nexthop_key key; struct mlxsw_sp_nexthop *nh; @@ -1849,7 +1964,7 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; @@ -1864,6 +1979,8 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) if (!nh_grp) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->neigh_tbl = &arp_tbl; + nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK; nh_grp->count = fi->fib_nhs; nh_grp->key.fi = fi; @@ -1871,9 +1988,9 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; fib_nh = &fi->fib_nh[i]; - err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); + err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); if (err) - goto err_nexthop_init; + goto err_nexthop4_init; } err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) @@ -1882,10 +1999,10 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) return nh_grp; err_nexthop_group_insert: -err_nexthop_init: +err_nexthop4_init: for (i--; i >= 0; i--) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } fib_info_put(nh_grp->key.fi); kfree(nh_grp); @@ -1893,8 +2010,8 @@ err_nexthop_init: } static void -mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) { struct mlxsw_sp_nexthop *nh; int i; @@ -1902,7 +2019,7 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); WARN_ON_ONCE(nh_grp->adj_index_valid); @@ -1910,9 +2027,9 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, kfree(nh_grp); } -static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - struct fib_info *fi) +static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) { struct mlxsw_sp_nexthop_group_key key; struct mlxsw_sp_nexthop_group *nh_grp; @@ -1920,7 +2037,7 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, key.fi = fi; nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key); if (!nh_grp) { - nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); + nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi); if (IS_ERR(nh_grp)) return PTR_ERR(nh_grp); } @@ -1929,15 +2046,25 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; list_del(&fib_entry->nexthop_group_node); if (!list_empty(&nh_grp->fib_list)) return; - mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); + mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp); +} + +static bool +mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + return !fib4_entry->tos; } static bool @@ -1945,8 +2072,14 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; - if (fib_entry->params.tos) - return false; + switch (fib_entry->fib_node->fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (!mlxsw_sp_fib4_entry_should_offload(fib_entry)) + return false; + break; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: @@ -2009,13 +2142,37 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, } } -static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static void +mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, + const struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; + enum mlxsw_reg_ralxx_protocol proto; + u32 *p_dip; + + proto = (enum mlxsw_reg_ralxx_protocol) fib->proto; + + switch (fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + p_dip = (u32 *) fib_entry->fib_node->key.addr; + mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + *p_dip); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + fib_entry->fib_node->key.addr); + break; + } +} + +static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -2034,24 +2191,19 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; u16 trap_id = 0; u16 rif_index = 0; @@ -2063,42 +2215,34 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: - return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); } return -EINVAL; } @@ -2107,16 +2251,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - int err = -EINVAL; + int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); - switch (fib_entry->fib_node->fib->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); - break; - case MLXSW_SP_L3_PROTO_IPV6: - return err; - } mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); + return err; } @@ -2166,72 +2304,80 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, } } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, const struct fib_entry_notifier_info *fen_info) { + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_entry *fib_entry; int err; - fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); - if (!fib_entry) { - err = -ENOMEM; - goto err_fib_entry_alloc; - } + fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL); + if (!fib4_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib4_entry->common; err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); if (err) goto err_fib4_entry_type_set; - err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi); + err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); if (err) - goto err_nexthop_group_get; + goto err_nexthop4_group_get; - fib_entry->params.prio = fen_info->fi->fib_priority; - fib_entry->params.tb_id = fen_info->tb_id; - fib_entry->params.type = fen_info->type; - fib_entry->params.tos = fen_info->tos; + fib4_entry->prio = fen_info->fi->fib_priority; + fib4_entry->tb_id = fen_info->tb_id; + fib4_entry->type = fen_info->type; + fib4_entry->tos = fen_info->tos; fib_entry->fib_node = fib_node; - return fib_entry; + return fib4_entry; -err_nexthop_group_get: +err_nexthop4_group_get: err_fib4_entry_type_set: - kfree(fib_entry); -err_fib_entry_alloc: + kfree(fib4_entry); return ERR_PTR(err); } static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); - kfree(fib_entry); + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + kfree(fib4_entry); } static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info); +mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len); -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); - if (IS_ERR(fib_node)) + vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id); + if (!vr) return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id == fen_info->tb_id && - fib_entry->params.tos == fen_info->tos && - fib_entry->params.type == fen_info->type && - fib_entry->nh_group->key.fi == fen_info->fi) { - return fib_entry; + fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len); + if (!fib_node) + return NULL; + + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id == fen_info->tb_id && + fib4_entry->tos == fen_info->tos && + fib4_entry->type == fen_info->type && + fib4_entry->common.nh_group->key.fi == fen_info->fi) { + return fib4_entry; } } @@ -2388,28 +2534,25 @@ static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info) +mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr, + size_t addr_len, unsigned char prefix_len, + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id); if (IS_ERR(vr)) return ERR_CAST(vr); - fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); + fib = mlxsw_sp_vr_fib(vr, proto); - fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len); if (fib_node) return fib_node; - fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len); if (!fib_node) { err = -ENOMEM; goto err_fib_node_create; @@ -2428,8 +2571,8 @@ err_fib_node_create: return ERR_PTR(err); } -static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_node *fib_node) +static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) { struct mlxsw_sp_vr *vr = fib_node->fib->vr; @@ -2440,95 +2583,100 @@ static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(vr); } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry_params *params) + const struct mlxsw_sp_fib4_entry *new4_entry) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id > params->tb_id) + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id > new4_entry->tb_id) continue; - if (fib_entry->params.tb_id != params->tb_id) + if (fib4_entry->tb_id != new4_entry->tb_id) break; - if (fib_entry->params.tos > params->tos) + if (fib4_entry->tos > new4_entry->tos) continue; - if (fib_entry->params.prio >= params->prio || - fib_entry->params.tos < params->tos) - return fib_entry; + if (fib4_entry->prio >= new4_entry->prio || + fib4_entry->tos < new4_entry->tos) + return fib4_entry; } return NULL; } -static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry, - struct mlxsw_sp_fib_entry *new_entry) +static int +mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, + struct mlxsw_sp_fib4_entry *new4_entry) { struct mlxsw_sp_fib_node *fib_node; - if (WARN_ON(!fib_entry)) + if (WARN_ON(!fib4_entry)) return -EINVAL; - fib_node = fib_entry->fib_node; - list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id != new_entry->params.tb_id || - fib_entry->params.tos != new_entry->params.tos || - fib_entry->params.prio != new_entry->params.prio) + fib_node = fib4_entry->common.fib_node; + list_for_each_entry_from(fib4_entry, &fib_node->entry_list, + common.list) { + if (fib4_entry->tb_id != new4_entry->tb_id || + fib4_entry->tos != new4_entry->tos || + fib4_entry->prio != new4_entry->prio) break; } - list_add_tail(&new_entry->list, &fib_entry->list); + list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); return 0; } static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *new_entry, +mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *fib4_entry; - fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params); + fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); if (append) - return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry); - if (replace && WARN_ON(!fib_entry)) + return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); + if (replace && WARN_ON(!fib4_entry)) return -EINVAL; /* Insert new entry before replaced one, so that we can later * remove the second. */ - if (fib_entry) { - list_add_tail(&new_entry->list, &fib_entry->list); + if (fib4_entry) { + list_add_tail(&new4_entry->common.list, + &fib4_entry->common.list); } else { - struct mlxsw_sp_fib_entry *last; + struct mlxsw_sp_fib4_entry *last; - list_for_each_entry(last, &fib_node->entry_list, list) { - if (new_entry->params.tb_id > last->params.tb_id) + list_for_each_entry(last, &fib_node->entry_list, common.list) { + if (new4_entry->tb_id > last->tb_id) break; - fib_entry = last; + fib4_entry = last; } - if (fib_entry) - list_add(&new_entry->list, &fib_entry->list); + if (fib4_entry) + list_add(&new4_entry->common.list, + &fib4_entry->common.list); else - list_add(&new_entry->list, &fib_node->entry_list); + list_add(&new4_entry->common.list, + &fib_node->entry_list); } return 0; } static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) { - list_del(&fib_entry->list); + list_del(&fib4_entry->common.list); } -static int -mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return 0; @@ -2545,11 +2693,11 @@ mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); } -static void -mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return; @@ -2567,54 +2715,50 @@ mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; int err; - err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace, - append); + err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); if (err) return err; - err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry); + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); if (err) - goto err_fib4_node_entry_add; + goto err_fib_node_entry_add; return 0; -err_fib4_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib_entry); +err_fib_node_entry_add: + mlxsw_sp_fib4_node_list_remove(fib4_entry); return err; } static void mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - - mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); - mlxsw_sp_fib4_node_list_remove(fib_entry); + mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib4_node_list_remove(fib4_entry); } static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - struct mlxsw_sp_fib_entry *replaced; + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *replaced; if (!replace) return; /* We inserted the new entry before replaced one */ - replaced = list_next_entry(fib_entry, list); + replaced = list_next_entry(fib4_entry, common.list); mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static int @@ -2622,76 +2766,80 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; int err; if (mlxsw_sp->router->aborted) return 0; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id, + &fen_info->dst, sizeof(fen_info->dst), + fen_info->dst_len, + MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(fib_node)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n"); return PTR_ERR(fib_node); } - fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); - if (IS_ERR(fib_entry)) { + fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); + if (IS_ERR(fib4_entry)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n"); - err = PTR_ERR(fib_entry); + err = PTR_ERR(fib4_entry); goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace, + err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, append); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib4_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace); + mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); return 0; err_fib4_node_entry_link: - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); err_fib4_entry_create: - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); return err; } static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; if (mlxsw_sp->router->aborted) return; - fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); - if (WARN_ON(!fib_entry)) + fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); + if (WARN_ON(!fib4_entry)) return; - fib_node = fib_entry->fib_node; + fib_node = fib4_entry->common.fib_node; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } -static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_ralxx_protocol proto, + u8 tree_id) { char ralta_pl[MLXSW_REG_RALTA_LEN]; char ralst_pl[MLXSW_REG_RALST_LEN]; int i, err; - mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); if (err) return err; - mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); if (err) return err; @@ -2704,17 +2852,14 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, - MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); if (err) return err; - mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, - MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0, - 0); + mlxsw_reg_ralue_pack(ralue_pl, proto, + MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -2725,17 +2870,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; + int err; + + err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN); + if (err) + return err; + + proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; + return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN + 1); +} + static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib_entry *fib_entry, *tmp; + struct mlxsw_sp_fib4_entry *fib4_entry, *tmp; - list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) { - bool do_break = &tmp->list == &fib_node->entry_list; + list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list, + common.list) { + bool do_break = &tmp->common.list == &fib_node->entry_list; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); /* Break when entry list is empty and node was freed. * Otherwise, we'll access freed memory in the next * iteration. @@ -2784,10 +2945,17 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); + + /* If virtual router was only used for IPv4, then it's no + * longer used. + */ + if (!mlxsw_sp_vr_is_used(vr)) + continue; + mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); } } -static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) { int err; @@ -2832,7 +3000,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, replace, append); if (err) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_ENTRY_DEL: @@ -2843,13 +3011,13 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) case FIB_EVENT_RULE_DEL: rule = fib_work->fr_info.rule; if (!fib4_rule_default(rule) && !rule->l3mdev) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_rule_put(rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event, - fib_work->fnh_info.fib_nh); + mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, + fib_work->fnh_info.fib_nh); fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); break; } @@ -2941,17 +3109,30 @@ static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); } -static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, - const struct in_device *in_dev, - unsigned long event) +static bool +mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, + unsigned long event) { + struct inet6_dev *inet6_dev; + bool addr_list_empty = true; + struct in_device *idev; + switch (event) { case NETDEV_UP: if (!rif) return true; return false; case NETDEV_DOWN: - if (rif && !in_dev->ifa_list && + idev = __in_dev_get_rtnl(dev); + if (idev && idev->ifa_list) + addr_list_empty = false; + + inet6_dev = __in6_dev_get(dev); + if (addr_list_empty && inet6_dev && + !list_empty(&inet6_dev->addr_list)) + addr_list_empty = false; + + if (rif && addr_list_empty && !netif_is_l3_slave(rif->dev)) return true; /* It is possible we already removed the RIF ourselves @@ -3349,7 +3530,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, goto out; rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event)) + if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; err = __mlxsw_sp_inetaddr_event(dev, event); @@ -3357,6 +3538,61 @@ out: return notifier_from_errno(err); } +struct mlxsw_sp_inet6addr_event_work { + struct work_struct work; + struct net_device *dev; + unsigned long event; +}; + +static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_inet6addr_event_work *inet6addr_work = + container_of(work, struct mlxsw_sp_inet6addr_event_work, work); + struct net_device *dev = inet6addr_work->dev; + unsigned long event = inet6addr_work->event; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + + rtnl_lock(); + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + __mlxsw_sp_inetaddr_event(dev, event); +out: + rtnl_unlock(); + dev_put(dev); + kfree(inet6addr_work); +} + +/* Called with rcu_read_lock() */ +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr; + struct mlxsw_sp_inet6addr_event_work *inet6addr_work; + struct net_device *dev = if6->idev->dev; + + if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) + return NOTIFY_DONE; + + inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC); + if (!inet6addr_work) + return NOTIFY_BAD; + + INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work); + inet6addr_work->dev = dev; + inet6addr_work->event = event; + dev_hold(dev); + mlxsw_core_schedule_work(&inet6addr_work->work); + + return NOTIFY_DONE; +} + static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, const char *mac, int mtu) { @@ -3558,6 +3794,11 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) if (err) return err; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), true); if (err) @@ -3566,6 +3807,9 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) return 0; err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); return err; } @@ -3577,6 +3821,8 @@ static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); } @@ -3607,6 +3853,11 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) if (err) return err; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), true); if (err) @@ -3615,6 +3866,9 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) return 0; err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); return err; } @@ -3626,6 +3880,8 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); } @@ -3697,7 +3953,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return -EIO; max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); - mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); if (err) @@ -3709,7 +3965,7 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; - mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_rgcr_pack(rgcr_pl, false, false); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 12b5ed58f3eb..61652396bf75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -61,11 +61,32 @@ enum { MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, - MLXSW_TRAP_ID_OSPF = 0x55, + MLXSW_TRAP_ID_IPV4_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, + MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62, + MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63, + MLXSW_TRAP_ID_IPV6_OSPF = 0x64, + MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67, + MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68, + MLXSW_TRAP_ID_IPV6_DHCP = 0x69, + MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, - MLXSW_TRAP_ID_BGP_IPV4 = 0x88, + MLXSW_TRAP_ID_IPV4_BGP = 0x88, + MLXSW_TRAP_ID_IPV6_BGP = 0x89, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D, + MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E, MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, + MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91, + MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, MLXSW_TRAP_ID_ACL0 = 0x1C0, MLXSW_TRAP_ID_MAX = 0x1FF diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index fd2ec36c6fa1..462eda926b1c 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -42,8 +42,6 @@ * aggregated as a single large packet * napi: This parameter used to enable/disable NAPI (polling Rx) * Possible values '1' for enable and '0' for disable. Default is '1' - * ufo: This parameter used to enable/disable UDP Fragmentation Offload(UFO) - * Possible values '1' for enable and '0' for disable. Default is '0' * vlan_tag_strip: This can be used to enable or disable vlan stripping. * Possible values '1' for enable , '0' for disable. * Default is '2' - which means disable in promisc mode @@ -453,7 +451,6 @@ S2IO_PARM_INT(lro_max_pkts, 0xFFFF); S2IO_PARM_INT(indicate_max_pkts, 0); S2IO_PARM_INT(napi, 1); -S2IO_PARM_INT(ufo, 0); S2IO_PARM_INT(vlan_tag_strip, NO_STRIP_IN_PROMISC); static unsigned int tx_fifo_len[MAX_TX_FIFOS] = @@ -4128,32 +4125,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) } frg_len = skb_headlen(skb); - if (offload_type == SKB_GSO_UDP) { - int ufo_size; - - ufo_size = s2io_udp_mss(skb); - ufo_size &= ~7; - txdp->Control_1 |= TXD_UFO_EN; - txdp->Control_1 |= TXD_UFO_MSS(ufo_size); - txdp->Control_1 |= TXD_BUFFER0_SIZE(8); -#ifdef __BIG_ENDIAN - /* both variants do cpu_to_be64(be32_to_cpu(...)) */ - fifo->ufo_in_band_v[put_off] = - (__force u64)skb_shinfo(skb)->ip6_frag_id; -#else - fifo->ufo_in_band_v[put_off] = - (__force u64)skb_shinfo(skb)->ip6_frag_id << 32; -#endif - txdp->Host_Control = (unsigned long)fifo->ufo_in_band_v; - txdp->Buffer_Pointer = pci_map_single(sp->pdev, - fifo->ufo_in_band_v, - sizeof(u64), - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer)) - goto pci_map_failed; - txdp++; - } - txdp->Buffer_Pointer = pci_map_single(sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE); if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer)) @@ -4161,8 +4132,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Host_Control = (unsigned long)skb; txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len); - if (offload_type == SKB_GSO_UDP) - txdp->Control_1 |= TXD_UFO_EN; frg_cnt = skb_shinfo(skb)->nr_frags; /* For fragmented SKB. */ @@ -4177,14 +4146,9 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) skb_frag_size(frag), DMA_TO_DEVICE); txdp->Control_1 = TXD_BUFFER0_SIZE(skb_frag_size(frag)); - if (offload_type == SKB_GSO_UDP) - txdp->Control_1 |= TXD_UFO_EN; } txdp->Control_1 |= TXD_GATHER_CODE_LAST; - if (offload_type == SKB_GSO_UDP) - frg_cnt++; /* as Txd0 was used for inband header */ - tx_fifo = mac_control->tx_FIFO_start[queue]; val64 = fifo->list_info[put_off].list_phy_addr; writeq(val64, &tx_fifo->TxDL_Pointer); @@ -7910,11 +7874,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) NETIF_F_RXCSUM | NETIF_F_LRO; dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - if (sp->device_type & XFRAME_II_DEVICE) { - dev->hw_features |= NETIF_F_UFO; - if (ufo) - dev->features |= NETIF_F_UFO; - } if (sp->high_dma_flag == true) dev->features |= NETIF_F_HIGHDMA; dev->watchdog_timeo = WATCH_DOG_TIMEOUT; @@ -8147,10 +8106,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n", dev->name); - if (ufo) - DBG_PRINT(ERR_DBG, - "%s: UDP Fragmentation Offload(UFO) enabled\n", - dev->name); /* Initialize device name */ snprintf(sp->name, sizeof(sp->name), "%s Neterion %s", dev->name, sp->product_name); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index 0844b7c75767..afa10a163da1 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -1285,7 +1285,7 @@ flash_temp: int qlcnic_dump_fw(struct qlcnic_adapter *adapter) { struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump; - static const struct qlcnic_dump_operations *fw_dump_ops; + const struct qlcnic_dump_operations *fw_dump_ops; struct qlcnic_83xx_dump_template_hdr *hdr_83xx; u32 entry_offset, dump, no_entries, buf_offset = 0; int i, k, ops_cnt, ops_index, dump_size = 0; diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index c905971c5f3a..d3f96a8f743b 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -746,59 +746,171 @@ static const char *efx_mcdi_phy_test_name(struct efx_nic *efx, return NULL; } -#define SFP_PAGE_SIZE 128 -#define SFP_NUM_PAGES 2 -static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, - struct ethtool_eeprom *ee, u8 *data) +#define SFP_PAGE_SIZE 128 +#define SFF_DIAG_TYPE_OFFSET 92 +#define SFF_DIAG_ADDR_CHANGE BIT(2) +#define SFF_8079_NUM_PAGES 2 +#define SFF_8472_NUM_PAGES 4 +#define SFF_8436_NUM_PAGES 5 +#define SFF_DMT_LEVEL_OFFSET 94 + +/** efx_mcdi_phy_get_module_eeprom_page() - Get a single page of module eeprom + * @efx: NIC context + * @page: EEPROM page number + * @data: Destination data pointer + * @offset: Offset in page to copy from in to data + * @space: Space available in data + * + * Return: + * >=0 - amount of data copied + * <0 - error + */ +static int efx_mcdi_phy_get_module_eeprom_page(struct efx_nic *efx, + unsigned int page, + u8 *data, ssize_t offset, + ssize_t space) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX); MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN); size_t outlen; - int rc; unsigned int payload_len; - unsigned int space_remaining = ee->len; - unsigned int page; - unsigned int page_off; unsigned int to_copy; - u8 *user_data = data; + int rc; - BUILD_BUG_ON(SFP_PAGE_SIZE * SFP_NUM_PAGES != ETH_MODULE_SFF_8079_LEN); + if (offset > SFP_PAGE_SIZE) + return -EINVAL; - page_off = ee->offset % SFP_PAGE_SIZE; - page = ee->offset / SFP_PAGE_SIZE; + to_copy = min(space, SFP_PAGE_SIZE - offset); - while (space_remaining && (page < SFP_NUM_PAGES)) { - MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page); + MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_PHY_MEDIA_INFO, + inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), + &outlen); - rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_MEDIA_INFO, - inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), - &outlen); - if (rc) - return rc; + if (rc) + return rc; + + if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST + + SFP_PAGE_SIZE)) + return -EIO; + + payload_len = MCDI_DWORD(outbuf, GET_PHY_MEDIA_INFO_OUT_DATALEN); + if (payload_len != SFP_PAGE_SIZE) + return -EIO; - if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST + - SFP_PAGE_SIZE)) - return -EIO; + memcpy(data, MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + offset, + to_copy); - payload_len = MCDI_DWORD(outbuf, - GET_PHY_MEDIA_INFO_OUT_DATALEN); - if (payload_len != SFP_PAGE_SIZE) - return -EIO; + return to_copy; +} - /* Copy as much as we can into data */ - payload_len -= page_off; - to_copy = (space_remaining < payload_len) ? - space_remaining : payload_len; +static int efx_mcdi_phy_get_module_eeprom_byte(struct efx_nic *efx, + unsigned int page, + u8 byte) +{ + int rc; + u8 data; - memcpy(user_data, - MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + page_off, - to_copy); + rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, &data, byte, 1); + if (rc == 1) + return data; + + return rc; +} + +static int efx_mcdi_phy_diag_type(struct efx_nic *efx) +{ + /* Page zero of the EEPROM includes the diagnostic type at byte 92. */ + return efx_mcdi_phy_get_module_eeprom_byte(efx, 0, + SFF_DIAG_TYPE_OFFSET); +} - space_remaining -= to_copy; - user_data += to_copy; - page_off = 0; - page++; +static int efx_mcdi_phy_sff_8472_level(struct efx_nic *efx) +{ + /* Page zero of the EEPROM includes the DMT level at byte 94. */ + return efx_mcdi_phy_get_module_eeprom_byte(efx, 0, + SFF_DMT_LEVEL_OFFSET); +} + +static u32 efx_mcdi_phy_module_type(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_data = efx->phy_data; + + if (phy_data->media != MC_CMD_MEDIA_QSFP_PLUS) + return phy_data->media; + + /* A QSFP+ NIC may actually have an SFP+ module attached. + * The ID is page 0, byte 0. + */ + switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) { + case 0x3: + return MC_CMD_MEDIA_SFP_PLUS; + case 0xc: + case 0xd: + return MC_CMD_MEDIA_QSFP_PLUS; + default: + return 0; + } +} + +static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, + struct ethtool_eeprom *ee, u8 *data) +{ + int rc; + ssize_t space_remaining = ee->len; + unsigned int page_off; + bool ignore_missing; + int num_pages; + int page; + + switch (efx_mcdi_phy_module_type(efx)) { + case MC_CMD_MEDIA_SFP_PLUS: + num_pages = efx_mcdi_phy_sff_8472_level(efx) > 0 ? + SFF_8472_NUM_PAGES : SFF_8079_NUM_PAGES; + page = 0; + ignore_missing = false; + break; + case MC_CMD_MEDIA_QSFP_PLUS: + num_pages = SFF_8436_NUM_PAGES; + page = -1; /* We obtain the lower page by asking for -1. */ + ignore_missing = true; /* Ignore missing pages after page 0. */ + break; + default: + return -EOPNOTSUPP; + } + + page_off = ee->offset % SFP_PAGE_SIZE; + page += ee->offset / SFP_PAGE_SIZE; + + while (space_remaining && (page < num_pages)) { + rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, + data, page_off, + space_remaining); + + if (rc > 0) { + space_remaining -= rc; + data += rc; + page_off = 0; + page++; + } else if (rc == 0) { + space_remaining = 0; + } else if (ignore_missing && (page > 0)) { + int intended_size = SFP_PAGE_SIZE - page_off; + + space_remaining -= intended_size; + if (space_remaining < 0) { + space_remaining = 0; + } else { + memset(data, 0, intended_size); + data += intended_size; + page_off = 0; + page++; + rc = 0; + } + } else { + return rc; + } } return 0; @@ -807,16 +919,42 @@ static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, static int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *modinfo) { - struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + int sff_8472_level; + int diag_type; - switch (phy_cfg->media) { + switch (efx_mcdi_phy_module_type(efx)) { case MC_CMD_MEDIA_SFP_PLUS: - modinfo->type = ETH_MODULE_SFF_8079; - modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; - return 0; + sff_8472_level = efx_mcdi_phy_sff_8472_level(efx); + + /* If we can't read the diagnostics level we have none. */ + if (sff_8472_level < 0) + return -EOPNOTSUPP; + + /* Check if this module requires the (unsupported) address + * change operation. + */ + diag_type = efx_mcdi_phy_diag_type(efx); + + if ((sff_8472_level == 0) || + (diag_type & SFF_DIAG_ADDR_CHANGE)) { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } + break; + + case MC_CMD_MEDIA_QSFP_PLUS: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + default: return -EOPNOTSUPP; } + + return 0; } static const struct efx_phy_operations efx_mcdi_phy_ops = { diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 8603e397097e..5b56c24b6ed2 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -248,7 +248,7 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[], dev->ethtool_ops = &vsw_ethtool_ops; dev->watchdog_timeo = VSW_TX_TIMEOUT; - dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG; + dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; /* MTU range: 68 - 65535 */ diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 75b167e3fe98..0b95105f7060 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -312,7 +312,7 @@ static struct vnet *vnet_new(const u64 *local_mac, dev->watchdog_timeo = VNET_TX_TIMEOUT; dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | - NETIF_F_IP_CSUM | NETIF_F_SG; + NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; /* MTU range: 68 - 65535 */ diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 9e86833249d4..ecf456c7b6d1 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -303,7 +303,7 @@ static struct sk_buff *alloc_and_align_skb(struct net_device *dev, return skb; } -static inline void vnet_fullcsum(struct sk_buff *skb) +static inline void vnet_fullcsum_ipv4(struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); int offset = skb_transport_offset(skb); @@ -335,6 +335,40 @@ static inline void vnet_fullcsum(struct sk_buff *skb) } } +#if IS_ENABLED(CONFIG_IPV6) +static inline void vnet_fullcsum_ipv6(struct sk_buff *skb) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int offset = skb_transport_offset(skb); + + if (skb->protocol != htons(ETH_P_IPV6)) + return; + if (ip6h->nexthdr != IPPROTO_TCP && + ip6h->nexthdr != IPPROTO_UDP) + return; + skb->ip_summed = CHECKSUM_NONE; + skb->csum_level = 1; + skb->csum = 0; + if (ip6h->nexthdr == IPPROTO_TCP) { + struct tcphdr *ptcp = tcp_hdr(skb); + + ptcp->check = 0; + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + skb->len - offset, IPPROTO_TCP, + skb->csum); + } else if (ip6h->nexthdr == IPPROTO_UDP) { + struct udphdr *pudp = udp_hdr(skb); + + pudp->check = 0; + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + skb->len - offset, IPPROTO_UDP, + skb->csum); + } +} +#endif + static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) { struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); @@ -394,9 +428,14 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; - skb_reset_transport_header(skb); skb_set_transport_header(skb, ihl); - vnet_fullcsum(skb); + vnet_fullcsum_ipv4(skb); +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + skb_set_transport_header(skb, + sizeof(struct ipv6hdr)); + vnet_fullcsum_ipv6(skb); +#endif } } if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) { @@ -1115,24 +1154,47 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) if (skb->ip_summed == CHECKSUM_PARTIAL) start = skb_checksum_start_offset(skb); if (start) { - struct iphdr *iph = ip_hdr(nskb); int offset = start + nskb->csum_offset; + /* copy the headers, no csum here */ if (skb_copy_bits(skb, 0, nskb->data, start)) { dev_kfree_skb(nskb); dev_kfree_skb(skb); return NULL; } + + /* copy the rest, with csum calculation */ *(__sum16 *)(skb->data + offset) = 0; csum = skb_copy_and_csum_bits(skb, start, nskb->data + start, skb->len - start, 0); - if (iph->protocol == IPPROTO_TCP || - iph->protocol == IPPROTO_UDP) { - csum = csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - start, - iph->protocol, csum); + + /* add in the header checksums */ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(nskb); + + if (iph->protocol == IPPROTO_TCP || + iph->protocol == IPPROTO_UDP) { + csum = csum_tcpudp_magic(iph->saddr, + iph->daddr, + skb->len - start, + iph->protocol, + csum); + } + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = ipv6_hdr(nskb); + + if (ip6h->nexthdr == IPPROTO_TCP || + ip6h->nexthdr == IPPROTO_UDP) { + csum = csum_ipv6_magic(&ip6h->saddr, + &ip6h->daddr, + skb->len - start, + ip6h->nexthdr, + csum); + } } + + /* save the final result */ *(__sum16 *)(nskb->data + offset) = csum; nskb->ip_summed = CHECKSUM_NONE; @@ -1318,8 +1380,14 @@ int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, if (unlikely(!skb)) goto out_dropped; - if (skb->ip_summed == CHECKSUM_PARTIAL) - vnet_fullcsum(skb); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->protocol == htons(ETH_P_IP)) + vnet_fullcsum_ipv4(skb); +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) + vnet_fullcsum_ipv6(skb); +#endif + } dr = &port->vio.drings[VIO_DRIVER_TX_RING]; i = skb_get_queue_mapping(skb); diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index 3b91257683bc..e1b55b8fb8e0 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -17,6 +17,7 @@ #include <linux/netdevice.h> #include <linux/tcp.h> +#include <linux/interrupt.h> #include "dwc-xlgmac.h" #include "dwc-xlgmac-reg.h" diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index af27f7d1cbf3..5ef626331f85 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -389,7 +389,7 @@ struct axidma_bd { * @dma_err_tasklet: Tasklet structure to process Axi DMA errors * @tx_irq: Axidma TX IRQ number * @rx_irq: Axidma RX IRQ number - * @phy_type: Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X + * @phy_mode: Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X * @options: AxiEthernet option word * @last_link: Phy link state in which the PHY was negotiated earlier * @features: Stores the extended features supported by the axienet hw @@ -432,7 +432,7 @@ struct axienet_local { int tx_irq; int rx_irq; - u32 phy_type; + phy_interface_t phy_mode; u32 options; /* Current options word */ u32 last_link; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 33c595f4691d..e74e1e897864 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -531,11 +531,11 @@ static void axienet_adjust_link(struct net_device *ndev) link_state = phy->speed | (phy->duplex << 1) | phy->link; if (lp->last_link != link_state) { if ((phy->speed == SPEED_10) || (phy->speed == SPEED_100)) { - if (lp->phy_type == XAE_PHY_TYPE_1000BASE_X) + if (lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) setspeed = 0; } else { if ((phy->speed == SPEED_1000) && - (lp->phy_type == XAE_PHY_TYPE_MII)) + (lp->phy_mode == PHY_INTERFACE_MODE_MII)) setspeed = 0; } @@ -935,15 +935,8 @@ static int axienet_open(struct net_device *ndev) return ret; if (lp->phy_node) { - if (lp->phy_type == XAE_PHY_TYPE_GMII) { - phydev = of_phy_connect(lp->ndev, lp->phy_node, - axienet_adjust_link, 0, - PHY_INTERFACE_MODE_GMII); - } else if (lp->phy_type == XAE_PHY_TYPE_RGMII_2_0) { - phydev = of_phy_connect(lp->ndev, lp->phy_node, - axienet_adjust_link, 0, - PHY_INTERFACE_MODE_RGMII_ID); - } + phydev = of_phy_connect(lp->ndev, lp->phy_node, + axienet_adjust_link, 0, lp->phy_mode); if (!phydev) dev_err(lp->dev, "of_phy_connect() failed\n"); @@ -1539,7 +1532,38 @@ static int axienet_probe(struct platform_device *pdev) * the device-tree and accordingly set flags. */ of_property_read_u32(pdev->dev.of_node, "xlnx,rxmem", &lp->rxmem); - of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &lp->phy_type); + + /* Start with the proprietary, and broken phy_type */ + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &value); + if (!ret) { + netdev_warn(ndev, "Please upgrade your device tree binary blob to use phy-mode"); + switch (value) { + case XAE_PHY_TYPE_MII: + lp->phy_mode = PHY_INTERFACE_MODE_MII; + break; + case XAE_PHY_TYPE_GMII: + lp->phy_mode = PHY_INTERFACE_MODE_GMII; + break; + case XAE_PHY_TYPE_RGMII_2_0: + lp->phy_mode = PHY_INTERFACE_MODE_RGMII_ID; + break; + case XAE_PHY_TYPE_SGMII: + lp->phy_mode = PHY_INTERFACE_MODE_SGMII; + break; + case XAE_PHY_TYPE_1000BASE_X: + lp->phy_mode = PHY_INTERFACE_MODE_1000BASEX; + break; + default: + ret = -EINVAL; + goto free_netdev; + } + } else { + lp->phy_mode = of_get_phy_mode(pdev->dev.of_node); + if (lp->phy_mode < 0) { + ret = -EINVAL; + goto free_netdev; + } + } /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */ np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0); diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d6c25580f8dd..afb65f753574 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -183,10 +183,12 @@ struct rndis_device { /* Interface */ struct rndis_message; struct netvsc_device; -int netvsc_device_add(struct hv_device *device, - const struct netvsc_device_info *info); +struct net_device_context; + +struct netvsc_device *netvsc_device_add(struct hv_device *device, + const struct netvsc_device_info *info); void netvsc_device_remove(struct hv_device *device); -int netvsc_send(struct hv_device *device, +int netvsc_send(struct net_device_context *ndc, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, struct hv_page_buffer **page_buffer, @@ -200,10 +202,11 @@ int netvsc_recv_callback(struct net_device *net, const struct ndis_pkt_8021q_info *vlan); void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); +bool rndis_filter_opened(const struct netvsc_device *nvdev); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); -int rndis_filter_device_add(struct hv_device *dev, - struct netvsc_device_info *info); +struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, + struct netvsc_device_info *info); void rndis_filter_update(struct netvsc_device *nvdev); void rndis_filter_device_remove(struct hv_device *dev, struct netvsc_device *nvdev); @@ -724,6 +727,7 @@ struct net_device_context { /* Per channel data */ struct netvsc_channel { struct vmbus_channel *channel; + struct netvsc_device *net_device; const struct vmpacket_descriptor *desc; struct napi_struct napi; struct multi_send_data msd; @@ -783,18 +787,6 @@ struct netvsc_device { struct rcu_head rcu; }; -static inline struct netvsc_device * -net_device_to_netvsc_device(struct net_device *ndev) -{ - return ((struct net_device_context *)netdev_priv(ndev))->nvdev; -} - -static inline struct netvsc_device * -hv_device_to_netvsc_device(struct hv_device *device) -{ - return net_device_to_netvsc_device(hv_get_drvdata(device)); -} - /* NdisInitialize message */ struct rndis_initialize_request { u32 req_id; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0a9167dd72fb..0a9d9feedc3f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -29,6 +29,8 @@ #include <linux/netdevice.h> #include <linux/if_ether.h> #include <linux/vmalloc.h> +#include <linux/rtnetlink.h> + #include <asm/sync_bitops.h> #include "hyperv_net.h" @@ -41,7 +43,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) { struct net_device_context *net_device_ctx = netdev_priv(ndev); struct hv_device *dev = net_device_ctx->device_ctx; - struct netvsc_device *nv_dev = net_device_ctx->nvdev; + struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev); struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; memset(init_pkt, 0, sizeof(struct nvsp_message)); @@ -103,7 +105,8 @@ static void netvsc_destroy_buf(struct hv_device *device) { struct nvsp_message *revoke_packet; struct net_device *ndev = hv_get_drvdata(device); - struct netvsc_device *net_device = net_device_to_netvsc_device(ndev); + struct net_device_context *ndc = netdev_priv(ndev); + struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev); int ret; /* @@ -549,7 +552,8 @@ void netvsc_device_remove(struct hv_device *device) { struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct netvsc_device *net_device = net_device_ctx->nvdev; + struct netvsc_device *net_device + = rtnl_dereference(net_device_ctx->nvdev); int i; netvsc_disconnect_vsp(device); @@ -819,13 +823,16 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send, msdp->count = 0; } -int netvsc_send(struct hv_device *device, +/* RCU already held by caller */ +int netvsc_send(struct net_device_context *ndev_ctx, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, struct hv_page_buffer **pb, struct sk_buff *skb) { - struct netvsc_device *net_device = hv_device_to_netvsc_device(device); + struct netvsc_device *net_device + = rcu_dereference_rtnl(ndev_ctx->nvdev); + struct hv_device *device = ndev_ctx->device_ctx; int ret = 0; struct netvsc_channel *nvchan; u32 pktlen = packet->total_data_buflen, msd_len = 0; @@ -837,7 +844,7 @@ int netvsc_send(struct hv_device *device, bool xmit_more = (skb != NULL) ? skb->xmit_more : false; /* If device is rescinded, return error and packet will get dropped. */ - if (unlikely(net_device->destroy)) + if (unlikely(!net_device || net_device->destroy)) return -ENODEV; /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get @@ -1219,11 +1226,11 @@ int netvsc_poll(struct napi_struct *napi, int budget) { struct netvsc_channel *nvchan = container_of(napi, struct netvsc_channel, napi); + struct netvsc_device *net_device = nvchan->net_device; struct vmbus_channel *channel = nvchan->channel; struct hv_device *device = netvsc_channel_to_device(channel); u16 q_idx = channel->offermsg.offer.sub_channel_index; struct net_device *ndev = hv_get_drvdata(device); - struct netvsc_device *net_device = net_device_to_netvsc_device(ndev); int work_done = 0; /* If starting a new interval */ @@ -1271,8 +1278,8 @@ void netvsc_channel_cb(void *context) * netvsc_device_add - Callback when the device belonging to this * driver is added */ -int netvsc_device_add(struct hv_device *device, - const struct netvsc_device_info *device_info) +struct netvsc_device *netvsc_device_add(struct hv_device *device, + const struct netvsc_device_info *device_info) { int i, ret = 0; int ring_size = device_info->ring_size; @@ -1282,7 +1289,7 @@ int netvsc_device_add(struct hv_device *device, net_device = alloc_net_device(); if (!net_device) - return -ENOMEM; + return ERR_PTR(-ENOMEM); net_device->ring_size = ring_size; @@ -1302,6 +1309,7 @@ int netvsc_device_add(struct hv_device *device, struct netvsc_channel *nvchan = &net_device->chan_table[i]; nvchan->channel = device->channel; + nvchan->net_device = net_device; } /* Enable NAPI handler before init callbacks */ @@ -1338,7 +1346,7 @@ int netvsc_device_add(struct hv_device *device, goto close; } - return ret; + return net_device; close: netif_napi_del(&net_device->chan_table[0].napi); @@ -1349,6 +1357,5 @@ close: cleanup: free_netvsc_device(&net_device->rcu); - return ret; - + return ERR_PTR(ret); } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 63c98bbbc596..a164981c15f7 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -69,7 +69,7 @@ static void netvsc_set_multicast_list(struct net_device *net) static int netvsc_open(struct net_device *net) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct netvsc_device *nvdev = ndev_ctx->nvdev; + struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev); struct rndis_device *rdev; int ret = 0; @@ -505,8 +505,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) /* timestamp packet in software */ skb_tx_timestamp(skb); - ret = netvsc_send(net_device_ctx->device_ctx, packet, - rndis_msg, &pb, skb); + + ret = netvsc_send(net_device_ctx, packet, rndis_msg, &pb, skb); if (likely(ret == 0)) return NETDEV_TX_OK; @@ -717,6 +717,7 @@ static int netvsc_set_queues(struct net_device *net, struct hv_device *dev, u32 num_chn) { struct netvsc_device_info device_info; + struct netvsc_device *net_device; int ret; memset(&device_info, 0, sizeof(device_info)); @@ -724,17 +725,16 @@ static int netvsc_set_queues(struct net_device *net, struct hv_device *dev, device_info.ring_size = ring_size; device_info.max_num_vrss_chns = num_chn; - ret = rndis_filter_device_add(dev, &device_info); - if (ret) - return ret; - ret = netif_set_real_num_tx_queues(net, num_chn); if (ret) return ret; ret = netif_set_real_num_rx_queues(net, num_chn); + if (ret) + return ret; - return ret; + net_device = rndis_filter_device_add(dev, &device_info); + return IS_ERR(net_device) ? PTR_ERR(net_device) : 0; } static int netvsc_set_channels(struct net_device *net, @@ -744,7 +744,7 @@ static int netvsc_set_channels(struct net_device *net, struct hv_device *dev = net_device_ctx->device_ctx; struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); unsigned int count = channels->combined_count; - bool was_running; + bool was_opened; int ret; /* We do not support separate count for rx, tx, or other */ @@ -764,12 +764,9 @@ static int netvsc_set_channels(struct net_device *net, if (count > nvdev->max_chn) return -EINVAL; - was_running = netif_running(net); - if (was_running) { - ret = netvsc_close(net); - if (ret) - return ret; - } + was_opened = rndis_filter_opened(nvdev); + if (was_opened) + rndis_filter_close(nvdev); rndis_filter_device_remove(dev, nvdev); @@ -779,10 +776,12 @@ static int netvsc_set_channels(struct net_device *net, else netvsc_set_queues(net, dev, nvdev->num_chn); - if (was_running) - ret = netvsc_open(net); + nvdev = rtnl_dereference(net_device_ctx->nvdev); + if (was_opened) + rndis_filter_open(nvdev); /* We may have missed link change notifications */ + net_device_ctx->last_reconfig = 0; schedule_delayed_work(&net_device_ctx->dwork, 0); return ret; @@ -848,19 +847,18 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct net_device_context *ndevctx = netdev_priv(ndev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); struct hv_device *hdev = ndevctx->device_ctx; + int orig_mtu = ndev->mtu; struct netvsc_device_info device_info; - bool was_running; + bool was_opened; int ret = 0; if (!nvdev || nvdev->destroy) return -ENODEV; - was_running = netif_running(ndev); - if (was_running) { - ret = netvsc_close(ndev); - if (ret) - return ret; - } + netif_device_detach(ndev); + was_opened = rndis_filter_opened(nvdev); + if (was_opened) + rndis_filter_close(nvdev); memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; @@ -869,18 +867,21 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) rndis_filter_device_remove(hdev, nvdev); - /* 'nvdev' has been freed in rndis_filter_device_remove() -> - * netvsc_device_remove () -> free_netvsc_device(). - * We mustn't access it before it's re-created in - * rndis_filter_device_add() -> netvsc_device_add(). - */ - ndev->mtu = mtu; - rndis_filter_device_add(hdev, &device_info); + nvdev = rndis_filter_device_add(hdev, &device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); + + /* Attempt rollback to original MTU */ + ndev->mtu = orig_mtu; + rndis_filter_device_add(hdev, &device_info); + } + + if (was_opened) + rndis_filter_open(nvdev); - if (was_running) - ret = netvsc_open(ndev); + netif_device_attach(ndev); /* We may have missed link change notifications */ schedule_delayed_work(&ndevctx->dwork, 0); @@ -1363,7 +1364,7 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) continue; /* not a netvsc device */ net_device_ctx = netdev_priv(dev); - if (net_device_ctx->nvdev == NULL) + if (!rtnl_dereference(net_device_ctx->nvdev)) continue; /* device is removed */ if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) @@ -1528,8 +1529,10 @@ static int netvsc_probe(struct hv_device *dev, memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; device_info.num_chn = VRSS_CHANNEL_DEFAULT; - ret = rndis_filter_device_add(dev, &device_info); - if (ret != 0) { + + nvdev = rndis_filter_device_add(dev, &device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); free_netdev(net); hv_set_drvdata(dev, NULL); @@ -1543,10 +1546,11 @@ static int netvsc_probe(struct hv_device *dev, NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features; - /* RCU not necessary here, device not registered */ - nvdev = net_device_ctx->nvdev; netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); + rtnl_unlock(); + + netdev_lockdep_set_classes(net); /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; @@ -1588,7 +1592,8 @@ static int netvsc_remove(struct hv_device *dev) * removed. Also blocks mtu and channel changes. */ rtnl_lock(); - rndis_filter_device_remove(dev, ndev_ctx->nvdev); + rndis_filter_device_remove(dev, + rtnl_dereference(ndev_ctx->nvdev)); rtnl_unlock(); unregister_netdev(net); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 85c00e1c52b6..e439886f72c1 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -84,6 +84,14 @@ static struct rndis_device *get_rndis_device(void) return device; } +static struct netvsc_device * +net_device_to_netvsc_device(struct net_device *ndev) +{ + struct net_device_context *net_device_ctx = netdev_priv(ndev); + + return rtnl_dereference(net_device_ctx->nvdev); +} + static struct rndis_request *get_rndis_request(struct rndis_device *dev, u32 msg_type, u32 msg_len) @@ -243,7 +251,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, pb[0].len; } - ret = netvsc_send(net_device_ctx->device_ctx, packet, NULL, &pb, NULL); + ret = netvsc_send(net_device_ctx, packet, NULL, &pb, NULL); return ret; } @@ -472,7 +480,7 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) { struct net_device_context *ndevctx = netdev_priv(dev->ndev); - struct netvsc_device *nvdev = ndevctx->nvdev; + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); struct ndis_offload *hwcaps; u32 nvsp_version = nvdev->nvsp_version; u8 ndis_rev; @@ -658,9 +666,9 @@ cleanup: static int rndis_filter_set_offload_params(struct net_device *ndev, + struct netvsc_device *nvdev, struct ndis_offload_params *req_offloads) { - struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev); struct rndis_device *rdev = nvdev->extension; struct rndis_request *request; struct rndis_set_request *set; @@ -944,7 +952,7 @@ static void rndis_filter_halt_device(struct rndis_device *dev) struct rndis_request *request; struct rndis_halt_request *halt; struct net_device_context *net_device_ctx = netdev_priv(dev->ndev); - struct netvsc_device *nvdev = net_device_ctx->nvdev; + struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); /* Attempt to do a rndis device halt */ request = get_rndis_request(dev, RNDIS_MSG_HALT, @@ -1052,8 +1060,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) complete(&nvscdev->channel_init_wait); } -int rndis_filter_device_add(struct hv_device *dev, - struct netvsc_device_info *device_info) +struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, + struct netvsc_device_info *device_info) { struct net_device *net = hv_get_drvdata(dev); struct net_device_context *net_device_ctx = netdev_priv(net); @@ -1072,21 +1080,20 @@ int rndis_filter_device_add(struct hv_device *dev, rndis_device = get_rndis_device(); if (!rndis_device) - return -ENODEV; + return ERR_PTR(-ENODEV); /* * Let the inner driver handle this first to create the netvsc channel * NOTE! Once the channel is created, we may get a receive callback * (RndisFilterOnReceive()) before this call is completed */ - ret = netvsc_device_add(dev, device_info); - if (ret != 0) { + net_device = netvsc_device_add(dev, device_info); + if (IS_ERR(net_device)) { kfree(rndis_device); - return ret; + return net_device; } /* Initialize the rndis device */ - net_device = net_device_ctx->nvdev; net_device->max_chn = 1; net_device->num_chn = 1; @@ -1097,10 +1104,8 @@ int rndis_filter_device_add(struct hv_device *dev, /* Send the rndis initialization message */ ret = rndis_filter_init_device(rndis_device); - if (ret != 0) { - rndis_filter_device_remove(dev, net_device); - return ret; - } + if (ret != 0) + goto err_dev_remv; /* Get the MTU from the host */ size = sizeof(u32); @@ -1112,19 +1117,15 @@ int rndis_filter_device_add(struct hv_device *dev, /* Get the mac address */ ret = rndis_filter_query_device_mac(rndis_device); - if (ret != 0) { - rndis_filter_device_remove(dev, net_device); - return ret; - } + if (ret != 0) + goto err_dev_remv; memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); /* Find HW offload capabilities */ ret = rndis_query_hwcaps(rndis_device, &hwcaps); - if (ret != 0) { - rndis_filter_device_remove(dev, net_device); - return ret; - } + if (ret != 0) + goto err_dev_remv; /* A value of zero means "no change"; now turn on what we want. */ memset(&offloads, 0, sizeof(struct ndis_offload_params)); @@ -1179,7 +1180,7 @@ int rndis_filter_device_add(struct hv_device *dev, netif_set_gso_max_size(net, gso_max_size); - ret = rndis_filter_set_offload_params(net, &offloads); + ret = rndis_filter_set_offload_params(net, net_device, &offloads); if (ret) goto err_dev_remv; @@ -1190,7 +1191,7 @@ int rndis_filter_device_add(struct hv_device *dev, rndis_device->link_state ? "down" : "up"); if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) - return 0; + return net_device; rndis_filter_query_link_speed(rndis_device); @@ -1223,7 +1224,7 @@ int rndis_filter_device_add(struct hv_device *dev, num_rss_qs = net_device->num_chn - 1; if (num_rss_qs == 0) - return 0; + return net_device; refcount_set(&net_device->sc_offered, num_rss_qs); vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); @@ -1260,11 +1261,11 @@ out: net_device->num_chn = 1; } - return 0; /* return 0 because primary channel can be used alone */ + return net_device; err_dev_remv: rndis_filter_device_remove(dev, net_device); - return ret; + return ERR_PTR(ret); } void rndis_filter_device_remove(struct hv_device *dev, @@ -1302,3 +1303,8 @@ int rndis_filter_close(struct netvsc_device *nvdev) return rndis_filter_close_device(nvdev->extension); } + +bool rndis_filter_opened(const struct netvsc_device *nvdev) +{ + return atomic_read(&nvdev->open_cnt) > 0; +} diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index a626c539fb17..326243fae7e2 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -66,6 +66,7 @@ #include <linux/spinlock.h> #include <linux/string.h> #include <linux/workqueue.h> +#include <linux/interrupt.h> #include <net/ieee802154_netdev.h> #include <net/mac802154.h> diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index f37e3c1fd4e7..fdde20735416 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -169,7 +169,7 @@ static void ipvlan_port_destroy(struct net_device *dev) #define IPVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ - NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ + NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index 22f133ea8d7b..5dea2063dbc8 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -24,7 +24,7 @@ #include <linux/virtio_net.h> #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ - NETIF_F_TSO6 | NETIF_F_UFO) + NETIF_F_TSO6) static dev_t ipvtap_major; static struct cdev ipvtap_cdev; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 0f581ee74fe4..ca35c6ba7947 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -841,7 +841,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; #define MACVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ - NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \ + NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 91e7b19bbf86..c2d0ea2fb019 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -49,7 +49,7 @@ static struct class macvtap_class = { static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ - NETIF_F_TSO6 | NETIF_F_UFO) + NETIF_F_TSO6) static void macvtap_count_tx_dropped(struct tap_dev *tap) { diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 2df7b62c1a36..b6f9fa670168 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -399,8 +399,7 @@ error: } /* Put PHYs in RESET to save power */ - if (bus->reset_gpiod) - gpiod_set_value_cansleep(bus->reset_gpiod, 1); + gpiod_set_value_cansleep(bus->reset_gpiod, 1); device_del(&bus->dev); return err; @@ -425,8 +424,7 @@ void mdiobus_unregister(struct mii_bus *bus) } /* Put PHYs in RESET to save power */ - if (bus->reset_gpiod) - gpiod_set_value_cansleep(bus->reset_gpiod, 1); + gpiod_set_value_cansleep(bus->reset_gpiod, 1); device_del(&bus->dev); } diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 3570c7576993..ca267fd28ab8 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -943,9 +943,6 @@ static int set_offload(struct tap_queue *q, unsigned long arg) if (arg & TUN_F_TSO6) feature_mask |= NETIF_F_TSO6; } - - if (arg & TUN_F_UFO) - feature_mask |= NETIF_F_UFO; } /* tun/tap driver inverts the usage for TSO offloads, where @@ -956,7 +953,7 @@ static int set_offload(struct tap_queue *q, unsigned long arg) * When user space turns off TSO, we turn off GSO/LRO so that * user-space will not receive TSO frames. */ - if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO)) + if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6)) features |= RX_OFFLOADS; else features &= ~RX_OFFLOADS; @@ -1078,7 +1075,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | - TUN_F_TSO_ECN | TUN_F_UFO)) + TUN_F_TSO_ECN)) return -EINVAL; rtnl_lock(); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3d4c24572ecd..a93392d7a340 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -199,7 +199,7 @@ struct tun_struct { struct net_device *dev; netdev_features_t set_features; #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ - NETIF_F_TSO6|NETIF_F_UFO) + NETIF_F_TSO6) int align; int vnet_hdr_sz; @@ -1921,11 +1921,6 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) features |= NETIF_F_TSO6; arg &= ~(TUN_F_TSO4|TUN_F_TSO6); } - - if (arg & TUN_F_UFO) { - features |= NETIF_F_UFO; - arg &= ~TUN_F_UFO; - } } /* This gives the user a way to test for new features in future by diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8f572b9f3625..811b18215cae 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -367,7 +367,7 @@ static struct attribute *cdc_ncm_sysfs_attrs[] = { NULL, }; -static struct attribute_group cdc_ncm_sysfs_attr_group = { +static const struct attribute_group cdc_ncm_sysfs_attr_group = { .name = "cdc_ncm", .attrs = cdc_ncm_sysfs_attrs, }; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 99a26a9efec1..99830167ea2f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2429,7 +2429,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { - dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO + dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6; } /* Individual feature bits: what can host handle? */ @@ -2439,13 +2439,11 @@ static int virtnet_probe(struct virtio_device *vdev) dev->hw_features |= NETIF_F_TSO6; if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) dev->hw_features |= NETIF_F_TSO_ECN; - if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO)) - dev->hw_features |= NETIF_F_UFO; dev->features |= NETIF_F_GSO_ROBUST; if (gso) - dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO); + dev->features |= dev->hw_features & NETIF_F_ALL_TSO; /* (!csum && gso) case will be fixed by register_netdev() */ } if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 84143a02adce..54201c02fdb8 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -7837,7 +7837,7 @@ static int writerids(struct net_device *dev, aironet_ioctl *comp) { struct airo_info *ai = dev->ml_priv; int ridcode; int enabled; - static int (* writer)(struct airo_info *, u16 rid, const void *, int, int); + int (*writer)(struct airo_info *, u16 rid, const void *, int, int); unsigned char *iobuf; /* Only super-user can write RIDs */ diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index aaaca4d08e2b..ccbe74589eec 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -4324,7 +4324,7 @@ static struct attribute *ipw2100_sysfs_entries[] = { NULL, }; -static struct attribute_group ipw2100_attribute_group = { +static const struct attribute_group ipw2100_attribute_group = { .attrs = ipw2100_sysfs_entries, }; diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index 9368abdf18e2..c311b1a994c1 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -11500,7 +11500,7 @@ static struct attribute *ipw_sysfs_entries[] = { NULL }; -static struct attribute_group ipw_attribute_group = { +static const struct attribute_group ipw_attribute_group = { .name = NULL, /* put in device directory */ .attrs = ipw_sysfs_entries, }; diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 38bf403bb1e1..329f3a63dadd 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -3464,7 +3464,7 @@ static struct attribute *il3945_sysfs_entries[] = { NULL }; -static struct attribute_group il3945_attribute_group = { +static const struct attribute_group il3945_attribute_group = { .name = NULL, /* put in device directory */ .attrs = il3945_sysfs_entries, }; diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 5b51fba75595..de9b6522c43f 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4654,7 +4654,7 @@ static struct attribute *il_sysfs_entries[] = { NULL }; -static struct attribute_group il_attribute_group = { +static const struct attribute_group il_attribute_group = { .name = NULL, /* put in device directory */ .attrs = il_sysfs_entries, }; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c index 55f238a2a310..c58393eab6a1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c @@ -478,7 +478,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index) struct rtl_priv *rtlpriv = rtl_priv(hw); u16 read_point = 0, write_point = 0, remind_cnt = 0; u32 tmp_4byte = 0; - static u16 last_read_point; static bool start_rx; tmp_4byte = rtl_read_dword(rtlpriv, REG_RXQ_TXBD_IDX); @@ -506,7 +505,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index) rtlpci->rx_ring[queue_index].next_rx_rp = write_point; - last_read_point = read_point; return remind_cnt; } @@ -917,7 +915,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, struct rtl_priv *rtlpriv = rtl_priv(hw); u16 cur_tx_rp = 0; u16 cur_tx_wp = 0; - static u16 last_txw_point; static bool over_run; u32 tmp = 0; u8 q_idx = *val; @@ -951,9 +948,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, rtl_write_word(rtlpriv, get_desc_addr_fr_q_idx(q_idx), ring->cur_tx_wp); - - if (q_idx == 1) - last_txw_point = cur_tx_wp; } if (ring->avl_desc < (max_tx_desc - 15)) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b69e7a5869ff..6353c7474dba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -318,6 +318,12 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); + +/* Map specifics */ +struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +void __dev_map_insert_ctx(struct bpf_map *map, u32 index); +void __dev_map_flush(struct bpf_map *map); + #else static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -356,6 +362,20 @@ static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } + +static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, + u32 key) +{ + return NULL; +} + +static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index) +{ +} + +static inline void __dev_map_flush(struct bpf_map *map) +{ +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 3d137c33d664..b1e1035ca24b 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -35,3 +35,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) +#ifdef CONFIG_NET +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +#endif diff --git a/include/linux/filter.h b/include/linux/filter.h index bfef1e5734f8..d19ed3c15e1e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -711,7 +711,21 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); + +/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the + * same cpu context. Further for best results no more than a single map + * for the do_redirect/do_flush pair should be used. This limitation is + * because we only track one map and force a flush when the map changes. + * This does not appear to be a real limitation for existing software. + */ +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); +int xdp_do_redirect(struct net_device *dev, + struct xdp_buff *xdp, + struct bpf_prog *prog); +void xdp_do_flush_map(void); + void bpf_warn_invalid_xdp_action(u32 act); +void bpf_warn_invalid_xdp_redirect(u32 ifindex); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 1d4737cffc71..ebd273627334 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -36,7 +36,6 @@ enum { /**/NETIF_F_GSO_SHIFT, /* keep the order of SKB_GSO_* bits */ NETIF_F_TSO_BIT /* ... TCPv4 segmentation */ = NETIF_F_GSO_SHIFT, - NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ NETIF_F_TSO_MANGLEID_BIT, /* ... IPV4 ID mangling allowed */ @@ -118,7 +117,6 @@ enum { #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) #define NETIF_F_TSO __NETIF_F(TSO) -#define NETIF_F_UFO __NETIF_F(UFO) #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) @@ -172,7 +170,7 @@ enum { NETIF_F_FSO) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ NETIF_F_GSO_SCTP) /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 779b23595596..614642eb7eb7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -66,6 +66,7 @@ struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; struct bpf_prog; +struct xdp_buff; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1138,7 +1139,12 @@ struct xfrmdev_ops { * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); * This function is used to set or query state related to XDP on the * netdevice. See definition of enum xdp_netdev_command for details. - * + * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); + * This function is used to submit a XDP packet for transmit on a + * netdevice. + * void (*ndo_xdp_flush)(struct net_device *dev); + * This function is used to inform the driver to flush a paticular + * xpd tx queue. Must be called on same CPU as xdp_xmit. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1323,6 +1329,9 @@ struct net_device_ops { int needed_headroom); int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + int (*ndo_xdp_xmit)(struct net_device *dev, + struct xdp_buff *xdp); + void (*ndo_xdp_flush)(struct net_device *dev); }; /** @@ -2423,8 +2432,8 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev); -int dev_close(struct net_device *dev); -int dev_close_many(struct list_head *head, bool unlink); +void dev_close(struct net_device *dev); +void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); @@ -4089,7 +4098,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 99e866487e2f..913474dfc96c 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -273,69 +273,64 @@ struct sctp_init_chunk { /* Section 3.3.2.1. IPv4 Address Parameter (5) */ -typedef struct sctp_ipv4addr_param { +struct sctp_ipv4addr_param { struct sctp_paramhdr param_hdr; - struct in_addr addr; -} sctp_ipv4addr_param_t; + struct in_addr addr; +}; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ -typedef struct sctp_ipv6addr_param { +struct sctp_ipv6addr_param { struct sctp_paramhdr param_hdr; struct in6_addr addr; -} sctp_ipv6addr_param_t; +}; /* Section 3.3.2.1 Cookie Preservative (9) */ -typedef struct sctp_cookie_preserve_param { +struct sctp_cookie_preserve_param { struct sctp_paramhdr param_hdr; - __be32 lifespan_increment; -} sctp_cookie_preserve_param_t; + __be32 lifespan_increment; +}; /* Section 3.3.2.1 Host Name Address (11) */ -typedef struct sctp_hostname_param { +struct sctp_hostname_param { struct sctp_paramhdr param_hdr; uint8_t hostname[0]; -} sctp_hostname_param_t; +}; /* Section 3.3.2.1 Supported Address Types (12) */ -typedef struct sctp_supported_addrs_param { +struct sctp_supported_addrs_param { struct sctp_paramhdr param_hdr; __be16 types[0]; -} sctp_supported_addrs_param_t; - -/* Appendix A. ECN Capable (32768) */ -typedef struct sctp_ecn_capable_param { - struct sctp_paramhdr param_hdr; -} sctp_ecn_capable_param_t; +}; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ -typedef struct sctp_adaptation_ind_param { +struct sctp_adaptation_ind_param { struct sctp_paramhdr param_hdr; __be32 adaptation_ind; -} sctp_adaptation_ind_param_t; +}; /* ADDIP Section 4.2.7 Supported Extensions Parameter */ -typedef struct sctp_supported_ext_param { +struct sctp_supported_ext_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} sctp_supported_ext_param_t; +}; /* AUTH Section 3.1 Random */ -typedef struct sctp_random_param { +struct sctp_random_param { struct sctp_paramhdr param_hdr; __u8 random_val[0]; -} sctp_random_param_t; +}; /* AUTH Section 3.2 Chunk List */ -typedef struct sctp_chunks_param { +struct sctp_chunks_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} sctp_chunks_param_t; +}; /* AUTH Section 3.3 HMAC Algorithm */ -typedef struct sctp_hmac_algo_param { +struct sctp_hmac_algo_param { struct sctp_paramhdr param_hdr; __be16 hmac_ids[0]; -} sctp_hmac_algo_param_t; +}; /* RFC 2960. Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2): * The INIT ACK chunk is used to acknowledge the initiation of an SCTP diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dbe29b6c9bd6..4093552be1de 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -463,39 +463,38 @@ enum { enum { SKB_GSO_TCPV4 = 1 << 0, - SKB_GSO_UDP = 1 << 1, /* This indicates the skb is from an untrusted source. */ - SKB_GSO_DODGY = 1 << 2, + SKB_GSO_DODGY = 1 << 1, /* This indicates the tcp segment has CWR set. */ - SKB_GSO_TCP_ECN = 1 << 3, + SKB_GSO_TCP_ECN = 1 << 2, - SKB_GSO_TCP_FIXEDID = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 3, - SKB_GSO_TCPV6 = 1 << 5, + SKB_GSO_TCPV6 = 1 << 4, - SKB_GSO_FCOE = 1 << 6, + SKB_GSO_FCOE = 1 << 5, - SKB_GSO_GRE = 1 << 7, + SKB_GSO_GRE = 1 << 6, - SKB_GSO_GRE_CSUM = 1 << 8, + SKB_GSO_GRE_CSUM = 1 << 7, - SKB_GSO_IPXIP4 = 1 << 9, + SKB_GSO_IPXIP4 = 1 << 8, - SKB_GSO_IPXIP6 = 1 << 10, + SKB_GSO_IPXIP6 = 1 << 9, - SKB_GSO_UDP_TUNNEL = 1 << 11, + SKB_GSO_UDP_TUNNEL = 1 << 10, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, - SKB_GSO_PARTIAL = 1 << 13, + SKB_GSO_PARTIAL = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 14, + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, - SKB_GSO_SCTP = 1 << 15, + SKB_GSO_SCTP = 1 << 14, - SKB_GSO_ESP = 1 << 16, + SKB_GSO_ESP = 1 << 15, }; #if BITS_PER_LONG > 32 @@ -945,12 +944,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } -struct sk_buff *__alloc_skb_head(gfp_t priority, int node); -static inline struct sk_buff *alloc_skb_head(gfp_t priority) -{ - return __alloc_skb_head(priority, -1); -} - struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 5209b5ed2a64..32fb046f2173 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -18,9 +18,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; break; - case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; - break; default: return -EINVAL; } @@ -73,8 +70,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - else if (sinfo->gso_type & SKB_GSO_UDP) - hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; else return -EINVAL; if (sinfo->gso_type & SKB_GSO_TCP_ECN) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 53b1a2cca421..afb37f835449 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -58,7 +58,6 @@ struct unix_sock { struct list_head link; atomic_long_t inflight; spinlock_t lock; - unsigned char recursion_level; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 diff --git a/include/net/dsa.h b/include/net/dsa.h index 58969b9a090c..88da272d20d0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -256,11 +256,6 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); } -static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) -{ - return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; -} - static inline u8 dsa_upstream_port(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; diff --git a/include/net/flow.h b/include/net/flow.h index bae198b3039e..f3dc61b29bb5 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family) return 0; } -#define FLOW_DIR_IN 0 -#define FLOW_DIR_OUT 1 -#define FLOW_DIR_FWD 2 - -struct net; -struct sock; -struct flow_cache_ops; - -struct flow_cache_object { - const struct flow_cache_ops *ops; -}; - -struct flow_cache_ops { - struct flow_cache_object *(*get)(struct flow_cache_object *); - int (*check)(struct flow_cache_object *); - void (*delete)(struct flow_cache_object *); -}; - -typedef struct flow_cache_object *(*flow_resolve_t)( - struct net *net, const struct flowi *key, u16 family, - u8 dir, struct flow_cache_object *oldobj, void *ctx); - -struct flow_cache_object *flow_cache_lookup(struct net *net, - const struct flowi *key, u16 family, - u8 dir, flow_resolve_t resolver, - void *ctx); -int flow_cache_init(struct net *net); -void flow_cache_fini(struct net *net); -void flow_cache_hp_init(void); - -void flow_cache_flush(struct net *net); -void flow_cache_flush_deferred(struct net *net); -extern atomic_t flow_cache_genid; - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) diff --git a/include/net/flowcache.h b/include/net/flowcache.h deleted file mode 100644 index 51eb971e8973..000000000000 --- a/include/net/flowcache.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _NET_FLOWCACHE_H -#define _NET_FLOWCACHE_H - -#include <linux/interrupt.h> -#include <linux/types.h> -#include <linux/timer.h> -#include <linux/notifier.h> - -struct flow_cache_percpu { - struct hlist_head *hash_table; - unsigned int hash_count; - u32 hash_rnd; - int hash_rnd_recalc; - struct tasklet_struct flush_tasklet; -}; - -struct flow_cache { - u32 hash_shift; - struct flow_cache_percpu __percpu *percpu; - struct hlist_node node; - unsigned int low_watermark; - unsigned int high_watermark; - struct timer_list rnd_timer; -}; -#endif /* _NET_FLOWCACHE_H */ diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index f2a215fc78e4..950ed182f62f 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -33,18 +33,12 @@ struct inetpeer_addr { }; struct inet_peer { - /* group together avl_left,avl_right,v4daddr to speedup lookups */ - struct inet_peer __rcu *avl_left, *avl_right; + struct rb_node rb_node; struct inetpeer_addr daddr; - __u32 avl_height; u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ unsigned long rate_last; - union { - struct list_head gc_list; - struct rcu_head gc_rcu; - }; /* * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid @@ -55,7 +49,6 @@ struct inet_peer { atomic_t rid; /* Frag reception counter */ }; struct rcu_head rcu; - struct inet_peer *gc_next; }; /* following fields might be frequently dirtied */ @@ -64,7 +57,7 @@ struct inet_peer { }; struct inet_peer_base { - struct inet_peer __rcu *root; + struct rb_root rb_root; seqlock_t lock; int total; }; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 199056933dcb..907d39a42f6b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -194,7 +194,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, struct rt6_info *rt = (struct rt6_info *)dst; return rt->rt6i_flags & RTF_ANYCAST || - (rt->rt6i_dst.plen != 128 && + (rt->rt6i_dst.plen < 127 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 27bb9633c69d..611521646dd4 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,7 +6,6 @@ #include <linux/workqueue.h> #include <linux/xfrm.h> #include <net/dst_ops.h> -#include <net/flowcache.h> struct ctl_table_header; @@ -73,16 +72,6 @@ struct netns_xfrm { spinlock_t xfrm_state_lock; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; - - /* flow cache part */ - struct flow_cache flow_cache_global; - atomic_t flow_cache_genid; - struct list_head flow_cache_gc_list; - atomic_t flow_cache_gc_count; - spinlock_t flow_cache_gc_lock; - struct work_struct flow_cache_gc_work; - struct work_struct flow_cache_flush_work; - struct mutex flow_flush_sem; }; #endif diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5ab29af8ca8a..66cd7639b912 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1556,9 +1556,9 @@ struct sctp_association { * and authenticated chunk list. All that is part of the * cookie and these are just pointers to those locations */ - sctp_random_param_t *peer_random; - sctp_chunks_param_t *peer_chunks; - sctp_hmac_algo_param_t *peer_hmacs; + struct sctp_random_param *peer_random; + struct sctp_chunks_param *peer_chunks; + struct sctp_hmac_algo_param *peer_hmacs; } peer; /* State : A state variable indicating what state the diff --git a/include/net/tcp.h b/include/net/tcp.h index 70483296157f..4f056ea79df2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #endif #define TCP_RTO_MAX ((unsigned)(120*HZ)) #define TCP_RTO_MIN ((unsigned)(HZ/5)) +#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now * used as a fallback RTO for the @@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */ -#define TCP_REO_TIMEOUT_MIN (2000) /* Min RACK reordering timeout in usec */ - #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ #define TCP_KEEPALIVE_INTVL (75*HZ) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c0916ab18d32..afb4929d7232 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); +void xfrm_policy_cache_flush(void); void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; @@ -563,7 +564,6 @@ struct xfrm_policy { refcount_t refcnt; struct timer_list timer; - struct flow_cache_object flo; atomic_t genid; u32 priority; u32 index; @@ -978,7 +978,6 @@ struct xfrm_dst { struct rt6_info rt6; } u; struct dst_entry *route; - struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; @@ -1226,9 +1225,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } } -void xfrm_garbage_collect(struct net *net); -void xfrm_garbage_collect_deferred(struct net *net); - #else static inline void xfrm_sk_free_policy(struct sock *sk) {} @@ -1263,9 +1259,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, { return 1; } -static inline void xfrm_garbage_collect(struct net *net) -{ -} #endif static __inline__ diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 1b61357d3f57..7b1eb7b4be41 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -12,7 +12,8 @@ FN(ABORTED) \ FN(DROP) \ FN(PASS) \ - FN(TX) + FN(TX) \ + FN(REDIRECT) #define __XDP_ACT_TP_FN(x) \ TRACE_DEFINE_ENUM(XDP_##x); @@ -48,6 +49,34 @@ TRACE_EVENT(xdp_exception, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) ); +TRACE_EVENT(xdp_redirect, + + TP_PROTO(const struct net_device *from, + const struct net_device *to, + const struct bpf_prog *xdp, u32 act), + + TP_ARGS(from, to, xdp, act), + + TP_STRUCT__entry( + __string(name_from, from->name) + __string(name_to, to->name) + __array(u8, prog_tag, 8) + __field(u32, act) + ), + + TP_fast_assign( + BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); + memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __assign_str(name_from, from->name); + __assign_str(name_to, to->name); + __entry->act = act; + ), + + TP_printk("prog=%s from=%s to=%s action=%s", + __print_hex_str(__entry->prog_tag, 8), + __get_str(name_from), __get_str(name_to), + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) +); #endif /* _TRACE_XDP_H */ #include <trace/define_trace.h> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e99e3e6f8b37..1106a8c4cd36 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -104,6 +104,7 @@ enum bpf_map_type { BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, }; enum bpf_prog_type { @@ -347,6 +348,11 @@ union bpf_attr { * @flags: bit 0 - if set, redirect to ingress instead of egress * other bits - reserved * Return: TC_ACT_REDIRECT + * int bpf_redirect_map(key, map, flags) + * redirect to endpoint in map + * @key: index in map to lookup + * @map: fd of map to do lookup in + * @flags: -- * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid @@ -591,7 +597,8 @@ union bpf_attr { FN(get_socket_uid), \ FN(set_hash), \ FN(setsockopt), \ - FN(skb_adjust_room), + FN(skb_adjust_room), \ + FN(redirect_map), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -717,6 +724,7 @@ enum xdp_action { XDP_DROP, XDP_PASS, XDP_TX, + XDP_REDIRECT, }; /* user accessible metadata for XDP packet hook diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index e1e5e658f2db..48e92705be59 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -2,6 +2,9 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +ifeq ($(CONFIG_NET),y) +obj-$(CONFIG_BPF_SYSCALL) += devmap.o +endif ifeq ($(CONFIG_PERF_EVENTS),y) obj-$(CONFIG_BPF_SYSCALL) += stackmap.o endif diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c new file mode 100644 index 000000000000..899364d097f5 --- /dev/null +++ b/kernel/bpf/devmap.c @@ -0,0 +1,431 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* Devmaps primary use is as a backend map for XDP BPF helper call + * bpf_redirect_map(). Because XDP is mostly concerned with performance we + * spent some effort to ensure the datapath with redirect maps does not use + * any locking. This is a quick note on the details. + * + * We have three possible paths to get into the devmap control plane bpf + * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall + * will invoke an update, delete, or lookup operation. To ensure updates and + * deletes appear atomic from the datapath side xchg() is used to modify the + * netdev_map array. Then because the datapath does a lookup into the netdev_map + * array (read-only) from an RCU critical section we use call_rcu() to wait for + * an rcu grace period before free'ing the old data structures. This ensures the + * datapath always has a valid copy. However, the datapath does a "flush" + * operation that pushes any pending packets in the driver outside the RCU + * critical section. Each bpf_dtab_netdev tracks these pending operations using + * an atomic per-cpu bitmap. The bpf_dtab_netdev object will not be destroyed + * until all bits are cleared indicating outstanding flush operations have + * completed. + * + * BPF syscalls may race with BPF program calls on any of the update, delete + * or lookup operations. As noted above the xchg() operation also keep the + * netdev_map consistent in this case. From the devmap side BPF programs + * calling into these operations are the same as multiple user space threads + * making system calls. + * + * Finally, any of the above may race with a netdev_unregister notifier. The + * unregister notifier must search for net devices in the map structure that + * contain a reference to the net device and remove them. This is a two step + * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b) + * check to see if the ifindex is the same as the net_device being removed. + * Unfortunately, the xchg() operations do not protect against this. To avoid + * potentially removing incorrect objects the dev_map_list_mutex protects + * conflicting netdev unregister and BPF syscall operations. Updates and + * deletes from a BPF program (done in rcu critical section) are blocked + * because of this mutex. + */ +#include <linux/bpf.h> +#include <linux/jhash.h> +#include <linux/filter.h> +#include <linux/rculist_nulls.h> +#include "percpu_freelist.h" +#include "bpf_lru_list.h" +#include "map_in_map.h" + +struct bpf_dtab_netdev { + struct net_device *dev; + int key; + struct rcu_head rcu; + struct bpf_dtab *dtab; +}; + +struct bpf_dtab { + struct bpf_map map; + struct bpf_dtab_netdev **netdev_map; + unsigned long int __percpu *flush_needed; + struct list_head list; +}; + +static DEFINE_MUTEX(dev_map_list_mutex); +static LIST_HEAD(dev_map_list); + +static struct bpf_map *dev_map_alloc(union bpf_attr *attr) +{ + struct bpf_dtab *dtab; + u64 cost; + int err; + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size != 4 || attr->map_flags) + return ERR_PTR(-EINVAL); + + /* if value_size is bigger, the user space won't be able to + * access the elements. + */ + if (attr->value_size > KMALLOC_MAX_SIZE) + return ERR_PTR(-E2BIG); + + dtab = kzalloc(sizeof(*dtab), GFP_USER); + if (!dtab) + return ERR_PTR(-ENOMEM); + + /* mandatory map attributes */ + dtab->map.map_type = attr->map_type; + dtab->map.key_size = attr->key_size; + dtab->map.value_size = attr->value_size; + dtab->map.max_entries = attr->max_entries; + dtab->map.map_flags = attr->map_flags; + + err = -ENOMEM; + + /* make sure page count doesn't overflow */ + cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); + cost += BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); + if (cost >= U32_MAX - PAGE_SIZE) + goto free_dtab; + + dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + /* if map size is larger than memlock limit, reject it early */ + err = bpf_map_precharge_memlock(dtab->map.pages); + if (err) + goto free_dtab; + + /* A per cpu bitfield with a bit per possible net device */ + dtab->flush_needed = __alloc_percpu( + BITS_TO_LONGS(attr->max_entries) * + sizeof(unsigned long), + __alignof__(unsigned long)); + if (!dtab->flush_needed) + goto free_dtab; + + dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * + sizeof(struct bpf_dtab_netdev *)); + if (!dtab->netdev_map) + goto free_dtab; + + mutex_lock(&dev_map_list_mutex); + list_add_tail(&dtab->list, &dev_map_list); + mutex_unlock(&dev_map_list_mutex); + return &dtab->map; + +free_dtab: + free_percpu(dtab->flush_needed); + kfree(dtab); + return ERR_PTR(err); +} + +static void dev_map_free(struct bpf_map *map) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + int i, cpu; + + /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding critical sections in + * these programs to complete. The rcu critical section only guarantees + * no further reads against netdev_map. It does __not__ ensure pending + * flush operations (if any) are complete. + */ + synchronize_rcu(); + + /* To ensure all pending flush operations have completed wait for flush + * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. + * Because the above synchronize_rcu() ensures the map is disconnected + * from the program we can assume no new bits will be set. + */ + for_each_online_cpu(cpu) { + unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu); + + while (!bitmap_empty(bitmap, dtab->map.max_entries)) + cpu_relax(); + } + + /* Although we should no longer have datapath or bpf syscall operations + * at this point we we can still race with netdev notifier, hence the + * lock. + */ + mutex_lock(&dev_map_list_mutex); + for (i = 0; i < dtab->map.max_entries; i++) { + struct bpf_dtab_netdev *dev; + + dev = dtab->netdev_map[i]; + if (!dev) + continue; + + dev_put(dev->dev); + kfree(dev); + } + + /* At this point bpf program is detached and all pending operations + * _must_ be complete + */ + list_del(&dtab->list); + mutex_unlock(&dev_map_list_mutex); + free_percpu(dtab->flush_needed); + bpf_map_area_free(dtab->netdev_map); + kfree(dtab); +} + +static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + u32 index = key ? *(u32 *)key : U32_MAX; + u32 *next = (u32 *)next_key; + + if (index >= dtab->map.max_entries) { + *next = 0; + return 0; + } + + if (index == dtab->map.max_entries - 1) + return -ENOENT; + + *next = index + 1; + return 0; +} + +void __dev_map_insert_ctx(struct bpf_map *map, u32 key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); + + __set_bit(key, bitmap); +} + +struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *dev; + + if (key >= map->max_entries) + return NULL; + + dev = READ_ONCE(dtab->netdev_map[key]); + return dev ? dev->dev : NULL; +} + +/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled + * from the driver before returning from its napi->poll() routine. The poll() + * routine is called either from busy_poll context or net_rx_action signaled + * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the + * net device can be torn down. On devmap tear down we ensure the ctx bitmap + * is zeroed before completing to ensure all flush operations have completed. + */ +void __dev_map_flush(struct bpf_map *map) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); + u32 bit; + + for_each_set_bit(bit, bitmap, map->max_entries) { + struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); + struct net_device *netdev; + + /* This is possible if the dev entry is removed by user space + * between xdp redirect and flush op. + */ + if (unlikely(!dev)) + continue; + + netdev = dev->dev; + + __clear_bit(bit, bitmap); + if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush)) + continue; + + netdev->netdev_ops->ndo_xdp_flush(netdev); + } +} + +/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or + * update happens in parallel here a dev_put wont happen until after reading the + * ifindex. + */ +static void *dev_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *dev; + u32 i = *(u32 *)key; + + if (i >= map->max_entries) + return NULL; + + dev = READ_ONCE(dtab->netdev_map[i]); + return dev ? &dev->dev->ifindex : NULL; +} + +static void dev_map_flush_old(struct bpf_dtab_netdev *old_dev) +{ + if (old_dev->dev->netdev_ops->ndo_xdp_flush) { + struct net_device *fl = old_dev->dev; + unsigned long *bitmap; + int cpu; + + for_each_online_cpu(cpu) { + bitmap = per_cpu_ptr(old_dev->dtab->flush_needed, cpu); + __clear_bit(old_dev->key, bitmap); + + fl->netdev_ops->ndo_xdp_flush(old_dev->dev); + } + } +} + +static void __dev_map_entry_free(struct rcu_head *rcu) +{ + struct bpf_dtab_netdev *old_dev; + + old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu); + dev_map_flush_old(old_dev); + dev_put(old_dev->dev); + kfree(old_dev); +} + +static int dev_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *old_dev; + int k = *(u32 *)key; + + if (k >= map->max_entries) + return -EINVAL; + + /* Use synchronize_rcu() here to ensure any rcu critical sections + * have completed, but this does not guarantee a flush has happened + * yet. Because driver side rcu_read_lock/unlock only protects the + * running XDP program. However, for pending flush operations the + * dev and ctx are stored in another per cpu map. And additionally, + * the driver tear down ensures all soft irqs are complete before + * removing the net device in the case of dev_put equals zero. + */ + mutex_lock(&dev_map_list_mutex); + old_dev = xchg(&dtab->netdev_map[k], NULL); + if (old_dev) + call_rcu(&old_dev->rcu, __dev_map_entry_free); + mutex_unlock(&dev_map_list_mutex); + return 0; +} + +static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct net *net = current->nsproxy->net_ns; + struct bpf_dtab_netdev *dev, *old_dev; + u32 i = *(u32 *)key; + u32 ifindex = *(u32 *)value; + + if (unlikely(map_flags > BPF_EXIST)) + return -EINVAL; + + if (unlikely(i >= dtab->map.max_entries)) + return -E2BIG; + + if (unlikely(map_flags == BPF_NOEXIST)) + return -EEXIST; + + if (!ifindex) { + dev = NULL; + } else { + dev = kmalloc(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN); + if (!dev) + return -ENOMEM; + + dev->dev = dev_get_by_index(net, ifindex); + if (!dev->dev) { + kfree(dev); + return -EINVAL; + } + + dev->key = i; + dev->dtab = dtab; + } + + /* Use call_rcu() here to ensure rcu critical sections have completed + * Remembering the driver side flush operation will happen before the + * net device is removed. + */ + mutex_lock(&dev_map_list_mutex); + old_dev = xchg(&dtab->netdev_map[i], dev); + if (old_dev) + call_rcu(&old_dev->rcu, __dev_map_entry_free); + mutex_unlock(&dev_map_list_mutex); + + return 0; +} + +const struct bpf_map_ops dev_map_ops = { + .map_alloc = dev_map_alloc, + .map_free = dev_map_free, + .map_get_next_key = dev_map_get_next_key, + .map_lookup_elem = dev_map_lookup_elem, + .map_update_elem = dev_map_update_elem, + .map_delete_elem = dev_map_delete_elem, +}; + +static int dev_map_notification(struct notifier_block *notifier, + ulong event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct bpf_dtab *dtab; + int i; + + switch (event) { + case NETDEV_UNREGISTER: + mutex_lock(&dev_map_list_mutex); + list_for_each_entry(dtab, &dev_map_list, list) { + for (i = 0; i < dtab->map.max_entries; i++) { + struct bpf_dtab_netdev *dev; + + dev = dtab->netdev_map[i]; + if (!dev || + dev->dev->ifindex != netdev->ifindex) + continue; + dev = xchg(&dtab->netdev_map[i], NULL); + if (dev) + call_rcu(&dev->rcu, + __dev_map_entry_free); + } + } + mutex_unlock(&dev_map_list_mutex); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block dev_map_notifier = { + .notifier_call = dev_map_notification, +}; + +static int __init dev_map_init(void) +{ + register_netdevice_notifier(&dev_map_notifier); + return 0; +} + +subsys_initcall(dev_map_init); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index af9e84a4944e..db6a289ebf0b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1283,6 +1283,14 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; + /* devmap returns a pointer to a live net_device ifindex that we cannot + * allow to be modified from bpf side. So do not allow lookup elements + * for now. + */ + case BPF_MAP_TYPE_DEVMAP: + if (func_id != BPF_FUNC_redirect_map) + goto error; + break; case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: if (func_id != BPF_FUNC_map_lookup_elem) @@ -1311,6 +1319,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) goto error; break; + case BPF_FUNC_redirect_map: + if (map->map_type != BPF_MAP_TYPE_DEVMAP) + goto error; + break; default: break; } diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index ab3b654b05cc..2af4f1cc0ab4 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -618,12 +618,8 @@ static void ifup(struct net_device *netdev) static void ifdown(struct net_device *netdev) { - int err; - rtnl_lock(); - err = dev_close(netdev); - if (err < 0) - BT_INFO("iface %s cannot be closed (%d)", netdev->name, err); + dev_close(netdev); rtnl_unlock(); } diff --git a/net/core/Makefile b/net/core/Makefile index 79f9479e9658..d501c4278015 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -11,7 +11,6 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o -obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o diff --git a/net/core/dev.c b/net/core/dev.c index 8515f8fe0460..509af6ce8831 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1413,7 +1413,7 @@ int dev_open(struct net_device *dev) } EXPORT_SYMBOL(dev_open); -static int __dev_close_many(struct list_head *head) +static void __dev_close_many(struct list_head *head) { struct net_device *dev; @@ -1455,23 +1455,18 @@ static int __dev_close_many(struct list_head *head) dev->flags &= ~IFF_UP; netpoll_poll_enable(dev); } - - return 0; } -static int __dev_close(struct net_device *dev) +static void __dev_close(struct net_device *dev) { - int retval; LIST_HEAD(single); list_add(&dev->close_list, &single); - retval = __dev_close_many(&single); + __dev_close_many(&single); list_del(&single); - - return retval; } -int dev_close_many(struct list_head *head, bool unlink) +void dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1488,8 +1483,6 @@ int dev_close_many(struct list_head *head, bool unlink) if (unlink) list_del_init(&dev->close_list); } - - return 0; } EXPORT_SYMBOL(dev_close_many); @@ -1502,7 +1495,7 @@ EXPORT_SYMBOL(dev_close_many); * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier * chain. */ -int dev_close(struct net_device *dev) +void dev_close(struct net_device *dev) { if (dev->flags & IFF_UP) { LIST_HEAD(single); @@ -1511,7 +1504,6 @@ int dev_close(struct net_device *dev) dev_close_many(&single, true); list_del(&single); } - return 0; } EXPORT_SYMBOL(dev_close); @@ -3865,6 +3857,121 @@ drop: return NET_RX_DROP; } +static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + struct xdp_buff xdp; + u32 act = XDP_DROP; + void *orig_data; + int hlen, off; + u32 mac_len; + + /* Reinjected packets coming from act_mirred or similar should + * not get XDP generic processing. + */ + if (skb_cloned(skb)) + return XDP_PASS; + + if (skb_linearize(skb)) + goto do_drop; + + /* The XDP program wants to see the packet starting at the MAC + * header. + */ + mac_len = skb->data - skb_mac_header(skb); + hlen = skb_headlen(skb) + mac_len; + xdp.data = skb->data - mac_len; + xdp.data_end = xdp.data + hlen; + xdp.data_hard_start = skb->data - skb_headroom(skb); + orig_data = xdp.data; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + off = xdp.data - orig_data; + if (off > 0) + __skb_pull(skb, off); + else if (off < 0) + __skb_push(skb, -off); + + switch (act) { + case XDP_REDIRECT: + case XDP_TX: + __skb_push(skb, mac_len); + /* fall through */ + case XDP_PASS: + break; + + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(skb->dev, xdp_prog, act); + /* fall through */ + case XDP_DROP: + do_drop: + kfree_skb(skb); + break; + } + + return act; +} + +/* When doing generic XDP we have to bypass the qdisc layer and the + * network taps in order to match in-driver-XDP behavior. + */ +static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + txq = netdev_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + if (free_skb) { + trace_xdp_exception(dev, xdp_prog, XDP_TX); + kfree_skb(skb); + } +} + +static struct static_key generic_xdp_needed __read_mostly; + +static int do_xdp_generic(struct sk_buff *skb) +{ + struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); + + if (xdp_prog) { + u32 act = netif_receive_generic_xdp(skb, xdp_prog); + int err; + + if (act != XDP_PASS) { + switch (act) { + case XDP_REDIRECT: + err = xdp_do_generic_redirect(skb->dev, skb); + if (err) + goto out_redir; + /* fallthru to submit skb */ + case XDP_TX: + generic_xdp_tx(skb, xdp_prog); + break; + } + return XDP_DROP; + } + } + return XDP_PASS; +out_redir: + trace_xdp_exception(skb->dev, xdp_prog, XDP_REDIRECT); + kfree_skb(skb); + return XDP_DROP; +} + static int netif_rx_internal(struct sk_buff *skb) { int ret; @@ -3872,6 +3979,18 @@ static int netif_rx_internal(struct sk_buff *skb) net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); + + if (static_key_false(&generic_xdp_needed)) { + int ret = do_xdp_generic(skb); + + /* Consider XDP consuming the packet a success from + * the netdev point of view we do not want to count + * this as an error. + */ + if (ret != XDP_PASS) + return NET_RX_SUCCESS; + } + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -4338,8 +4457,6 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -static struct static_key generic_xdp_needed __read_mostly; - static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) { struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); @@ -4373,89 +4490,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) return ret; } -static u32 netif_receive_generic_xdp(struct sk_buff *skb, - struct bpf_prog *xdp_prog) -{ - struct xdp_buff xdp; - u32 act = XDP_DROP; - void *orig_data; - int hlen, off; - u32 mac_len; - - /* Reinjected packets coming from act_mirred or similar should - * not get XDP generic processing. - */ - if (skb_cloned(skb)) - return XDP_PASS; - - if (skb_linearize(skb)) - goto do_drop; - - /* The XDP program wants to see the packet starting at the MAC - * header. - */ - mac_len = skb->data - skb_mac_header(skb); - hlen = skb_headlen(skb) + mac_len; - xdp.data = skb->data - mac_len; - xdp.data_end = xdp.data + hlen; - xdp.data_hard_start = skb->data - skb_headroom(skb); - orig_data = xdp.data; - - act = bpf_prog_run_xdp(xdp_prog, &xdp); - - off = xdp.data - orig_data; - if (off > 0) - __skb_pull(skb, off); - else if (off < 0) - __skb_push(skb, -off); - - switch (act) { - case XDP_TX: - __skb_push(skb, mac_len); - /* fall through */ - case XDP_PASS: - break; - - default: - bpf_warn_invalid_xdp_action(act); - /* fall through */ - case XDP_ABORTED: - trace_xdp_exception(skb->dev, xdp_prog, act); - /* fall through */ - case XDP_DROP: - do_drop: - kfree_skb(skb); - break; - } - - return act; -} - -/* When doing generic XDP we have to bypass the qdisc layer and the - * network taps in order to match in-driver-XDP behavior. - */ -static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) -{ - struct net_device *dev = skb->dev; - struct netdev_queue *txq; - bool free_skb = true; - int cpu, rc; - - txq = netdev_pick_tx(dev, skb, NULL); - cpu = smp_processor_id(); - HARD_TX_LOCK(dev, txq, cpu); - if (!netif_xmit_stopped(txq)) { - rc = netdev_start_xmit(skb, dev, txq, 0); - if (dev_xmit_complete(rc)) - free_skb = false; - } - HARD_TX_UNLOCK(dev, txq); - if (free_skb) { - trace_xdp_exception(dev, xdp_prog, XDP_TX); - kfree_skb(skb); - } -} - static int netif_receive_skb_internal(struct sk_buff *skb) { int ret; @@ -4468,17 +4502,11 @@ static int netif_receive_skb_internal(struct sk_buff *skb) rcu_read_lock(); if (static_key_false(&generic_xdp_needed)) { - struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); - - if (xdp_prog) { - u32 act = netif_receive_generic_xdp(skb, xdp_prog); + int ret = do_xdp_generic(skb); - if (act != XDP_PASS) { - rcu_read_unlock(); - if (act == XDP_TX) - generic_xdp_tx(skb, xdp_prog); - return NET_RX_DROP; - } + if (ret != XDP_PASS) { + rcu_read_unlock(); + return NET_RX_DROP; } } @@ -6689,8 +6717,12 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) */ ret = 0; - if ((old_flags ^ flags) & IFF_UP) - ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); + if ((old_flags ^ flags) & IFF_UP) { + if (old_flags & IFF_UP) + __dev_close(dev); + else + ret = __dev_open(dev); + } if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; @@ -7235,24 +7267,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_GSO; } - /* UFO needs SG and checksumming */ - if (features & NETIF_F_UFO) { - /* maybe split UFO into V4 and V6? */ - if (!(features & NETIF_F_HW_CSUM) && - ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) != - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) { - netdev_dbg(dev, - "Dropping NETIF_F_UFO since no checksum offload features.\n"); - features &= ~NETIF_F_UFO; - } - - if (!(features & NETIF_F_SG)) { - netdev_dbg(dev, - "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); - features &= ~NETIF_F_UFO; - } - } - /* GSO partial features require GSO partial be set */ if ((features & dev->gso_partial_features) && !(features & NETIF_F_GSO_PARTIAL)) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 674b6c9cec18..78408ab77a10 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -76,7 +76,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_LRO_BIT] = "rx-lro", [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", - [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", @@ -299,9 +298,6 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) case ETHTOOL_GTSO: case ETHTOOL_STSO: return NETIF_F_ALL_TSO; - case ETHTOOL_GUFO: - case ETHTOOL_SUFO: - return NETIF_F_UFO; case ETHTOOL_GGSO: case ETHTOOL_SGSO: return NETIF_F_GSO; @@ -2555,7 +2551,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPHYSTATS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: - case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: case ETHTOOL_GFLAGS: @@ -2723,7 +2718,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCSUM: case ETHTOOL_GSG: case ETHTOOL_GTSO: - case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: rc = ethtool_get_one_feature(dev, useraddr, ethcmd); @@ -2732,7 +2726,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCSUM: case ETHTOOL_SSG: case ETHTOOL_STSO: - case ETHTOOL_SUFO: case ETHTOOL_SGSO: case ETHTOOL_SGRO: rc = ethtool_set_one_feature(dev, useraddr, ethcmd); diff --git a/net/core/filter.c b/net/core/filter.c index f44fc22fd45a..7e9708653c6f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -55,6 +55,7 @@ #include <net/sock_reuseport.h> #include <net/busy_poll.h> #include <net/tcp.h> +#include <linux/bpf_trace.h> /** * sk_filter_trim_cap - run a packet through a socket filter @@ -1778,6 +1779,8 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { struct redirect_info { u32 ifindex; u32 flags; + struct bpf_map *map; + struct bpf_map *map_to_flush; }; static DEFINE_PER_CPU(struct redirect_info, redirect_info); @@ -1791,6 +1794,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) ri->ifindex = ifindex; ri->flags = flags; + ri->map = NULL; return TC_ACT_REDIRECT; } @@ -1818,6 +1822,29 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_3(bpf_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + if (unlikely(flags)) + return XDP_ABORTED; + + ri->ifindex = ifindex; + ri->flags = flags; + ri->map = map; + + return XDP_REDIRECT; +} + +static const struct bpf_func_proto bpf_redirect_map_proto = { + .func = bpf_redirect_map, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { return task_get_classid(skb); @@ -2024,8 +2051,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { - /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to - * be changed into SKB_GSO_TCPV6. + /* SKB_GSO_TCPV4 needs to be changed into + * SKB_GSO_TCPV6. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; @@ -2060,8 +2087,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { - /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to - * be changed into SKB_GSO_TCPV4. + /* SKB_GSO_TCPV6 needs to be changed into + * SKB_GSO_TCPV4. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; @@ -2412,6 +2439,140 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +static int __bpf_tx_xdp(struct net_device *dev, + struct bpf_map *map, + struct xdp_buff *xdp, + u32 index) +{ + int err; + + if (!dev->netdev_ops->ndo_xdp_xmit) { + bpf_warn_invalid_xdp_redirect(dev->ifindex); + return -EOPNOTSUPP; + } + + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + if (err) + return err; + + if (map) + __dev_map_insert_ctx(map, index); + else + dev->netdev_ops->ndo_xdp_flush(dev); + + return err; +} + +void xdp_do_flush_map(void) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct bpf_map *map = ri->map_to_flush; + + ri->map = NULL; + ri->map_to_flush = NULL; + + if (map) + __dev_map_flush(map); +} +EXPORT_SYMBOL_GPL(xdp_do_flush_map); + +int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct bpf_map *map = ri->map; + u32 index = ri->ifindex; + struct net_device *fwd; + int err = -EINVAL; + + ri->ifindex = 0; + ri->map = NULL; + + fwd = __dev_map_lookup_elem(map, index); + if (!fwd) + goto out; + + if (ri->map_to_flush && (ri->map_to_flush != map)) + xdp_do_flush_map(); + + err = __bpf_tx_xdp(fwd, map, xdp, index); + if (likely(!err)) + ri->map_to_flush = map; + +out: + trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT); + return err; +} + +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct net_device *fwd; + + if (ri->map) + return xdp_do_redirect_map(dev, xdp, xdp_prog); + + fwd = dev_get_by_index_rcu(dev_net(dev), ri->ifindex); + ri->ifindex = 0; + ri->map = NULL; + if (unlikely(!fwd)) { + bpf_warn_invalid_xdp_redirect(ri->ifindex); + return -EINVAL; + } + + trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT); + + return __bpf_tx_xdp(fwd, NULL, xdp, 0); +} +EXPORT_SYMBOL_GPL(xdp_do_redirect); + +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + unsigned int len; + + dev = dev_get_by_index_rcu(dev_net(dev), ri->ifindex); + ri->ifindex = 0; + if (unlikely(!dev)) { + bpf_warn_invalid_xdp_redirect(ri->ifindex); + goto err; + } + + if (unlikely(!(dev->flags & IFF_UP))) + goto err; + + len = dev->mtu + dev->hard_header_len + VLAN_HLEN; + if (skb->len > len) + goto err; + + skb->dev = dev; + return 0; +err: + return -EINVAL; +} +EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); + +BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + if (unlikely(flags)) + return XDP_ABORTED; + + ri->ifindex = ifindex; + ri->flags = flags; + return XDP_REDIRECT; +} + +static const struct bpf_func_proto bpf_xdp_redirect_proto = { + .func = bpf_xdp_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || @@ -3011,6 +3172,10 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; + case BPF_FUNC_redirect: + return &bpf_xdp_redirect_proto; + case BPF_FUNC_redirect_map: + return &bpf_redirect_map_proto; default: return bpf_base_func_proto(func_id); } @@ -3310,6 +3475,11 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +void bpf_warn_invalid_xdp_redirect(u32 ifindex) +{ + WARN_ONCE(1, "Illegal XDP redirect to unsupported device ifindex(%i)\n", ifindex); +} + static bool __is_valid_sock_ops_access(int off, int size) { if (off < 0 || off >= sizeof(struct bpf_sock_ops)) diff --git a/net/core/flow.c b/net/core/flow.c deleted file mode 100644 index f7f5d1932a27..000000000000 --- a/net/core/flow.c +++ /dev/null @@ -1,516 +0,0 @@ -/* flow.c: Generic flow cache. - * - * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru) - * Copyright (C) 2003 David S. Miller (davem@redhat.com) - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/list.h> -#include <linux/jhash.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/completion.h> -#include <linux/percpu.h> -#include <linux/bitops.h> -#include <linux/notifier.h> -#include <linux/cpu.h> -#include <linux/cpumask.h> -#include <linux/mutex.h> -#include <net/flow.h> -#include <linux/atomic.h> -#include <linux/security.h> -#include <net/net_namespace.h> - -struct flow_cache_entry { - union { - struct hlist_node hlist; - struct list_head gc_list; - } u; - struct net *net; - u16 family; - u8 dir; - u32 genid; - struct flowi key; - struct flow_cache_object *object; -}; - -struct flow_flush_info { - struct flow_cache *cache; - atomic_t cpuleft; - struct completion completion; -}; - -static struct kmem_cache *flow_cachep __read_mostly; - -#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift) -#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) - -static void flow_cache_new_hashrnd(unsigned long arg) -{ - struct flow_cache *fc = (void *) arg; - int i; - - for_each_possible_cpu(i) - per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1; - - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); -} - -static int flow_entry_valid(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (atomic_read(&xfrm->flow_cache_genid) != fle->genid) - return 0; - if (fle->object && !fle->object->ops->check(fle->object)) - return 0; - return 1; -} - -static void flow_entry_kill(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (fle->object) - fle->object->ops->delete(fle->object); - kmem_cache_free(flow_cachep, fle); -} - -static void flow_cache_gc_task(struct work_struct *work) -{ - struct list_head gc_list; - struct flow_cache_entry *fce, *n; - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_gc_work); - - INIT_LIST_HEAD(&gc_list); - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - - list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { - flow_entry_kill(fce, xfrm); - atomic_dec(&xfrm->flow_cache_gc_count); - } -} - -static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - unsigned int deleted, - struct list_head *gc_list, - struct netns_xfrm *xfrm) -{ - if (deleted) { - atomic_add(deleted, &xfrm->flow_cache_gc_count); - fcp->hash_count -= deleted; - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - schedule_work(&xfrm->flow_cache_gc_work); - } -} - -static void __flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - unsigned int shrink_to) -{ - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - for (i = 0; i < flow_cache_hash_size(fc); i++) { - unsigned int saved = 0; - - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (saved < shrink_to && - flow_entry_valid(fle, xfrm)) { - saved++; - } else { - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); -} - -static void flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); - - __flow_cache_shrink(fc, fcp, shrink_to); -} - -static void flow_new_hash_rnd(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - get_random_bytes(&fcp->hash_rnd, sizeof(u32)); - fcp->hash_rnd_recalc = 0; - __flow_cache_shrink(fc, fcp, 0); -} - -static u32 flow_hash_code(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - const struct flowi *key, - unsigned int keysize) -{ - const u32 *k = (const u32 *) key; - const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - - return jhash2(k, length, fcp->hash_rnd) - & (flow_cache_hash_size(fc) - 1); -} - -/* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment. - */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, - unsigned int keysize) -{ - const flow_compare_t *k1, *k1_lim, *k2; - - k1 = (const flow_compare_t *) key1; - k1_lim = k1 + keysize; - - k2 = (const flow_compare_t *) key2; - - do { - if (*k1++ != *k2++) - return 1; - } while (k1 < k1_lim); - - return 0; -} - -struct flow_cache_object * -flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver, void *ctx) -{ - struct flow_cache *fc = &net->xfrm.flow_cache_global; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle, *tfle; - struct flow_cache_object *flo; - unsigned int keysize; - unsigned int hash; - - local_bh_disable(); - fcp = this_cpu_ptr(fc->percpu); - - fle = NULL; - flo = NULL; - - keysize = flow_key_size(family); - if (!keysize) - goto nocache; - - /* Packet really early in init? Making flow_cache_init a - * pre-smp initcall would solve this. --RR */ - if (!fcp->hash_table) - goto nocache; - - if (fcp->hash_rnd_recalc) - flow_new_hash_rnd(fc, fcp); - - hash = flow_hash_code(fc, fcp, key, keysize); - hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { - if (tfle->net == net && - tfle->family == family && - tfle->dir == dir && - flow_key_compare(key, &tfle->key, keysize) == 0) { - fle = tfle; - break; - } - } - - if (unlikely(!fle)) { - if (fcp->hash_count > fc->high_watermark) - flow_cache_shrink(fc, fcp); - - if (atomic_read(&net->xfrm.flow_cache_gc_count) > - 2 * num_online_cpus() * fc->high_watermark) { - flo = ERR_PTR(-ENOBUFS); - goto ret_object; - } - - fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); - if (fle) { - fle->net = net; - fle->family = family; - fle->dir = dir; - memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); - fle->object = NULL; - hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); - fcp->hash_count++; - } - } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) { - flo = fle->object; - if (!flo) - goto ret_object; - flo = flo->ops->get(flo); - if (flo) - goto ret_object; - } else if (fle->object) { - flo = fle->object; - flo->ops->delete(flo); - fle->object = NULL; - } - -nocache: - flo = NULL; - if (fle) { - flo = fle->object; - fle->object = NULL; - } - flo = resolver(net, key, family, dir, flo, ctx); - if (fle) { - fle->genid = atomic_read(&net->xfrm.flow_cache_genid); - if (!IS_ERR(flo)) - fle->object = flo; - else - fle->genid--; - } else { - if (!IS_ERR_OR_NULL(flo)) - flo->ops->delete(flo); - } -ret_object: - local_bh_enable(); - return flo; -} -EXPORT_SYMBOL(flow_cache_lookup); - -static void flow_cache_flush_tasklet(unsigned long data) -{ - struct flow_flush_info *info = (void *)data; - struct flow_cache *fc = info->cache; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - fcp = this_cpu_ptr(fc->percpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (flow_entry_valid(fle, xfrm)) - continue; - - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); - - if (atomic_dec_and_test(&info->cpuleft)) - complete(&info->completion); -} - -/* - * Return whether a cpu needs flushing. Conservatively, we assume - * the presence of any entries means the core may require flushing, - * since the flow_cache_ops.check() function may assume it's running - * on the same core as the per-cpu cache component. - */ -static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp; - unsigned int i; - - fcp = per_cpu_ptr(fc->percpu, cpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) - if (!hlist_empty(&fcp->hash_table[i])) - return 0; - return 1; -} - -static void flow_cache_flush_per_cpu(void *data) -{ - struct flow_flush_info *info = data; - struct tasklet_struct *tasklet; - - tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; - tasklet->data = (unsigned long)info; - tasklet_schedule(tasklet); -} - -void flow_cache_flush(struct net *net) -{ - struct flow_flush_info info; - cpumask_var_t mask; - int i, self; - - /* Track which cpus need flushing to avoid disturbing all cores. */ - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return; - cpumask_clear(mask); - - /* Don't want cpus going down or up during this. */ - get_online_cpus(); - mutex_lock(&net->xfrm.flow_flush_sem); - info.cache = &net->xfrm.flow_cache_global; - for_each_online_cpu(i) - if (!flow_cache_percpu_empty(info.cache, i)) - cpumask_set_cpu(i, mask); - atomic_set(&info.cpuleft, cpumask_weight(mask)); - if (atomic_read(&info.cpuleft) == 0) - goto done; - - init_completion(&info.completion); - - local_bh_disable(); - self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); - on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); - if (self) - flow_cache_flush_tasklet((unsigned long)&info); - local_bh_enable(); - - wait_for_completion(&info.completion); - -done: - mutex_unlock(&net->xfrm.flow_flush_sem); - put_online_cpus(); - free_cpumask_var(mask); -} - -static void flow_cache_flush_task(struct work_struct *work) -{ - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_flush_work); - struct net *net = container_of(xfrm, struct net, xfrm); - - flow_cache_flush(net); -} - -void flow_cache_flush_deferred(struct net *net) -{ - schedule_work(&net->xfrm.flow_cache_flush_work); -} - -static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); - - if (!fcp->hash_table) { - fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); - if (!fcp->hash_table) { - pr_err("NET: failed to allocate flow cache sz %u\n", sz); - return -ENOMEM; - } - fcp->hash_rnd_recalc = 1; - fcp->hash_count = 0; - tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); - } - return 0; -} - -static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - - return flow_cache_cpu_prepare(fc, cpu); -} - -static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - - __flow_cache_shrink(fc, fcp, 0); - return 0; -} - -int flow_cache_init(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - if (!flow_cachep) - flow_cachep = kmem_cache_create("flow_cache", - sizeof(struct flow_cache_entry), - 0, SLAB_PANIC, NULL); - spin_lock_init(&net->xfrm.flow_cache_gc_lock); - INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list); - INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); - INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); - mutex_init(&net->xfrm.flow_flush_sem); - atomic_set(&net->xfrm.flow_cache_gc_count, 0); - - fc->hash_shift = 10; - fc->low_watermark = 2 * flow_cache_hash_size(fc); - fc->high_watermark = 4 * flow_cache_hash_size(fc); - - fc->percpu = alloc_percpu(struct flow_cache_percpu); - if (!fc->percpu) - return -ENOMEM; - - if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node)) - goto err; - - setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, - (unsigned long) fc); - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); - - return 0; - -err: - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; - - return -ENOMEM; -} -EXPORT_SYMBOL(flow_cache_init); - -void flow_cache_fini(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - del_timer_sync(&fc->rnd_timer); - - cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node); - - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; -} -EXPORT_SYMBOL(flow_cache_fini); - -void __init flow_cache_hp_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE, - "net/flow:prepare", - flow_cache_cpu_up_prep, - flow_cache_cpu_dead); - WARN_ON(ret < 0); -} diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f990eb8b30a9..84bdfa229b0d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -158,31 +158,6 @@ out: * */ -struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) -{ - struct sk_buff *skb; - - /* Get the HEAD */ - skb = kmem_cache_alloc_node(skbuff_head_cache, - gfp_mask & ~__GFP_DMA, node); - if (!skb) - goto out; - - /* - * Only clear those fields we need to clear, not those that we will - * actually initialise below. Hence, don't put any more fields after - * the tail pointer in struct sk_buff! - */ - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->head = NULL; - skb->truesize = sizeof(struct sk_buff); - refcount_set(&skb->users, 1); - - skb->mac_header = (typeof(skb->mac_header))~0U; -out: - return skb; -} - /** * __alloc_skb - allocate a network buffer * @size: size to allocate @@ -663,8 +638,7 @@ void skb_release_head_state(struct sk_buff *skb) static void skb_release_all(struct sk_buff *skb) { skb_release_head_state(skb); - if (likely(skb->head)) - skb_release_data(skb); + skb_release_data(skb); } /** @@ -762,8 +736,7 @@ void consume_stateless_skb(struct sk_buff *skb) return; trace_consume_skb(skb); - if (likely(skb->head)) - skb_release_data(skb); + skb_release_data(skb); kfree_skbmem(skb); } @@ -1719,6 +1692,8 @@ pull_pages: if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); + if (!i) + goto end; eat = 0; } k++; @@ -1726,6 +1701,7 @@ pull_pages: } skb_shinfo(skb)->nr_frags = k; +end: skb->tail += delta; skb->data_len -= delta; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 416ac4ef9ba9..a55e2e4087a4 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -220,6 +220,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } #ifdef CONFIG_PM_SLEEP +static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) +{ + return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; +} + int dsa_switch_suspend(struct dsa_switch *ds) { int i, ret = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 76c2077c3f5b..5ce44fb7d498 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1219,10 +1219,9 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool udpfrag = false, fixedid = false, gso_partial, encap; + bool fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; - unsigned int offset = 0; struct iphdr *iph; int proto, tot_len; int nhoff; @@ -1257,7 +1256,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); if (!skb->encapsulation || encap) { - udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ @@ -1277,13 +1275,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); - if (udpfrag) { - iph->frag_off = htons(offset >> 3); - if (skb->next) - iph->frag_off |= htons(IP_MF); - offset += skb->len - nhoff - ihl; - tot_len = skb->len - nhoff; - } else if (skb_is_gso(skb)) { + if (skb_is_gso(skb)) { if (!fixedid) { iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index d5cac99170b1..416bb304a281 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, ufo, gso_partial; + bool need_csum, gso_partial; if (!skb->encapsulation) goto out; @@ -47,20 +47,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM); skb->encap_hdr_csum = need_csum; - ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); - features &= skb->dev->hw_enc_features; - /* The only checksum offload we care about from here on out is the - * outer one so strip the existing checksum feature flags based - * on the fact that we will be computing our checksum in software. - */ - if (ufo) { - features &= ~NETIF_F_CSUM_MASK; - if (!need_csum) - features |= NETIF_F_HW_CSUM; - } - /* segment inner packet. */ segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index c5a117cc6619..337ad41bb80a 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -33,7 +33,7 @@ * also be removed if the pool is overloaded i.e. if the total amount of * entries is greater-or-equal than the threshold. * - * Node pool is organised as an AVL tree. + * Node pool is organised as an RB tree. * Such an implementation has been chosen not just for fun. It's a way to * prevent easy and efficient DoS attacks by creating hash collisions. A huge * amount of long living nodes in a single hash slot would significantly delay @@ -45,7 +45,7 @@ * AND reference count being 0. * 3. Global variable peer_total is modified under the pool lock. * 4. struct inet_peer fields modification: - * avl_left, avl_right, avl_parent, avl_height: pool lock + * rb_node: pool lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable @@ -53,30 +53,15 @@ static struct kmem_cache *peer_cachep __read_mostly; -static LIST_HEAD(gc_list); -static const int gc_delay = 60 * HZ; -static struct delayed_work gc_work; -static DEFINE_SPINLOCK(gc_lock); - -#define node_height(x) x->avl_height - -#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) -#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node) -static const struct inet_peer peer_fake_node = { - .avl_left = peer_avl_empty_rcu, - .avl_right = peer_avl_empty_rcu, - .avl_height = 0 -}; - void inet_peer_base_init(struct inet_peer_base *bp) { - bp->root = peer_avl_empty_rcu; + bp->rb_root = RB_ROOT; seqlock_init(&bp->lock); bp->total = 0; } EXPORT_SYMBOL_GPL(inet_peer_base_init); -#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ +#define PEER_MAX_GC 32 /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more @@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ -static void inetpeer_gc_worker(struct work_struct *work) -{ - struct inet_peer *p, *n, *c; - struct list_head list; - - spin_lock_bh(&gc_lock); - list_replace_init(&gc_list, &list); - spin_unlock_bh(&gc_lock); - - if (list_empty(&list)) - return; - - list_for_each_entry_safe(p, n, &list, gc_list) { - - if (need_resched()) - cond_resched(); - - c = rcu_dereference_protected(p->avl_left, 1); - if (c != peer_avl_empty) { - list_add_tail(&c->gc_list, &list); - p->avl_left = peer_avl_empty_rcu; - } - - c = rcu_dereference_protected(p->avl_right, 1); - if (c != peer_avl_empty) { - list_add_tail(&c->gc_list, &list); - p->avl_right = peer_avl_empty_rcu; - } - - n = list_entry(p->gc_list.next, struct inet_peer, gc_list); - - if (refcount_read(&p->refcnt) == 1) { - list_del(&p->gc_list); - kmem_cache_free(peer_cachep, p); - } - } - - if (list_empty(&list)) - return; - - spin_lock_bh(&gc_lock); - list_splice(&list, &gc_list); - spin_unlock_bh(&gc_lock); - - schedule_delayed_work(&gc_work, gc_delay); -} - /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -153,225 +91,62 @@ void __init inet_initpeers(void) sizeof(struct inet_peer), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - - INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker); } -#define rcu_deref_locked(X, BASE) \ - rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock)) - -/* - * Called with local BH disabled and the pool lock held. - */ -#define lookup(_daddr, _stack, _base) \ -({ \ - struct inet_peer *u; \ - struct inet_peer __rcu **v; \ - \ - stackptr = _stack; \ - *stackptr++ = &_base->root; \ - for (u = rcu_deref_locked(_base->root, _base); \ - u != peer_avl_empty;) { \ - int cmp = inetpeer_addr_cmp(_daddr, &u->daddr); \ - if (cmp == 0) \ - break; \ - if (cmp == -1) \ - v = &u->avl_left; \ - else \ - v = &u->avl_right; \ - *stackptr++ = v; \ - u = rcu_deref_locked(*v, _base); \ - } \ - u; \ -}) - -/* - * Called with rcu_read_lock() - * Because we hold no lock against a writer, its quite possible we fall - * in an endless loop. - * But every pointer we follow is guaranteed to be valid thanks to RCU. - * We exit from this function if number of links exceeds PEER_MAXDEPTH - */ -static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, - struct inet_peer_base *base) +/* Called with rcu_read_lock() or base->lock held */ +static struct inet_peer *lookup(const struct inetpeer_addr *daddr, + struct inet_peer_base *base, + unsigned int seq, + struct inet_peer *gc_stack[], + unsigned int *gc_cnt, + struct rb_node **parent_p, + struct rb_node ***pp_p) { - struct inet_peer *u = rcu_dereference(base->root); - int count = 0; + struct rb_node **pp, *parent; + struct inet_peer *p; + + pp = &base->rb_root.rb_node; + parent = NULL; + while (*pp) { + int cmp; - while (u != peer_avl_empty) { - int cmp = inetpeer_addr_cmp(daddr, &u->daddr); + parent = rcu_dereference_raw(*pp); + p = rb_entry(parent, struct inet_peer, rb_node); + cmp = inetpeer_addr_cmp(daddr, &p->daddr); if (cmp == 0) { - /* Before taking a reference, check if this entry was - * deleted (refcnt=0) - */ - if (!refcount_inc_not_zero(&u->refcnt)) { - u = NULL; - } - return u; + if (!refcount_inc_not_zero(&p->refcnt)) + break; + return p; + } + if (gc_stack) { + if (*gc_cnt < PEER_MAX_GC) + gc_stack[(*gc_cnt)++] = p; + } else if (unlikely(read_seqretry(&base->lock, seq))) { + break; } if (cmp == -1) - u = rcu_dereference(u->avl_left); + pp = &(*pp)->rb_left; else - u = rcu_dereference(u->avl_right); - if (unlikely(++count == PEER_MAXDEPTH)) - break; + pp = &(*pp)->rb_right; } + *parent_p = parent; + *pp_p = pp; return NULL; } -/* Called with local BH disabled and the pool lock held. */ -#define lookup_rightempty(start, base) \ -({ \ - struct inet_peer *u; \ - struct inet_peer __rcu **v; \ - *stackptr++ = &start->avl_left; \ - v = &start->avl_left; \ - for (u = rcu_deref_locked(*v, base); \ - u->avl_right != peer_avl_empty_rcu;) { \ - v = &u->avl_right; \ - *stackptr++ = v; \ - u = rcu_deref_locked(*v, base); \ - } \ - u; \ -}) - -/* Called with local BH disabled and the pool lock held. - * Variable names are the proof of operation correctness. - * Look into mm/map_avl.c for more detail description of the ideas. - */ -static void peer_avl_rebalance(struct inet_peer __rcu **stack[], - struct inet_peer __rcu ***stackend, - struct inet_peer_base *base) -{ - struct inet_peer __rcu **nodep; - struct inet_peer *node, *l, *r; - int lh, rh; - - while (stackend > stack) { - nodep = *--stackend; - node = rcu_deref_locked(*nodep, base); - l = rcu_deref_locked(node->avl_left, base); - r = rcu_deref_locked(node->avl_right, base); - lh = node_height(l); - rh = node_height(r); - if (lh > rh + 1) { /* l: RH+2 */ - struct inet_peer *ll, *lr, *lrl, *lrr; - int lrh; - ll = rcu_deref_locked(l->avl_left, base); - lr = rcu_deref_locked(l->avl_right, base); - lrh = node_height(lr); - if (lrh <= node_height(ll)) { /* ll: RH+1 */ - RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ - RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ - node->avl_height = lrh + 1; /* RH+1 or RH+2 */ - RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */ - RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */ - l->avl_height = node->avl_height + 1; - RCU_INIT_POINTER(*nodep, l); - } else { /* ll: RH, lr: RH+1 */ - lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */ - lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */ - RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ - RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ - node->avl_height = rh + 1; /* node: RH+1 */ - RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */ - RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */ - l->avl_height = rh + 1; /* l: RH+1 */ - RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */ - RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */ - lr->avl_height = rh + 2; - RCU_INIT_POINTER(*nodep, lr); - } - } else if (rh > lh + 1) { /* r: LH+2 */ - struct inet_peer *rr, *rl, *rlr, *rll; - int rlh; - rr = rcu_deref_locked(r->avl_right, base); - rl = rcu_deref_locked(r->avl_left, base); - rlh = node_height(rl); - if (rlh <= node_height(rr)) { /* rr: LH+1 */ - RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ - RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ - node->avl_height = rlh + 1; /* LH+1 or LH+2 */ - RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */ - RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */ - r->avl_height = node->avl_height + 1; - RCU_INIT_POINTER(*nodep, r); - } else { /* rr: RH, rl: RH+1 */ - rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */ - rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */ - RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ - RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ - node->avl_height = lh + 1; /* node: LH+1 */ - RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */ - RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */ - r->avl_height = lh + 1; /* r: LH+1 */ - RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */ - RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */ - rl->avl_height = lh + 2; - RCU_INIT_POINTER(*nodep, rl); - } - } else { - node->avl_height = (lh > rh ? lh : rh) + 1; - } - } -} - -/* Called with local BH disabled and the pool lock held. */ -#define link_to_pool(n, base) \ -do { \ - n->avl_height = 1; \ - n->avl_left = peer_avl_empty_rcu; \ - n->avl_right = peer_avl_empty_rcu; \ - /* lockless readers can catch us now */ \ - rcu_assign_pointer(**--stackptr, n); \ - peer_avl_rebalance(stack, stackptr, base); \ -} while (0) - static void inetpeer_free_rcu(struct rcu_head *head) { kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); } -static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, - struct inet_peer __rcu **stack[PEER_MAXDEPTH]) -{ - struct inet_peer __rcu ***stackptr, ***delp; - - if (lookup(&p->daddr, stack, base) != p) - BUG(); - delp = stackptr - 1; /* *delp[0] == p */ - if (p->avl_left == peer_avl_empty_rcu) { - *delp[0] = p->avl_right; - --stackptr; - } else { - /* look for a node to insert instead of p */ - struct inet_peer *t; - t = lookup_rightempty(p, base); - BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); - **--stackptr = t->avl_left; - /* t is removed, t->daddr > x->daddr for any - * x in p->avl_left subtree. - * Put t in the old place of p. */ - RCU_INIT_POINTER(*delp[0], t); - t->avl_left = p->avl_left; - t->avl_right = p->avl_right; - t->avl_height = p->avl_height; - BUG_ON(delp[1] != &p->avl_left); - delp[1] = &t->avl_left; /* was &p->avl_left */ - } - peer_avl_rebalance(stack, stackptr, base); - base->total--; - call_rcu(&p->rcu, inetpeer_free_rcu); -} - /* perform garbage collect on all items stacked during a lookup */ -static int inet_peer_gc(struct inet_peer_base *base, - struct inet_peer __rcu **stack[PEER_MAXDEPTH], - struct inet_peer __rcu ***stackptr) +static void inet_peer_gc(struct inet_peer_base *base, + struct inet_peer *gc_stack[], + unsigned int gc_cnt) { - struct inet_peer *p, *gchead = NULL; + struct inet_peer *p; __u32 delta, ttl; - int cnt = 0; + int i; if (base->total >= inet_peer_threshold) ttl = 0; /* be aggressive */ @@ -379,43 +154,38 @@ static int inet_peer_gc(struct inet_peer_base *base, ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * base->total / inet_peer_threshold * HZ; - stackptr--; /* last stack slot is peer_avl_empty */ - while (stackptr > stack) { - stackptr--; - p = rcu_deref_locked(**stackptr, base); - if (refcount_read(&p->refcnt) == 1) { - smp_rmb(); - delta = (__u32)jiffies - p->dtime; - if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) { - p->gc_next = gchead; - gchead = p; - } - } + for (i = 0; i < gc_cnt; i++) { + p = gc_stack[i]; + delta = (__u32)jiffies - p->dtime; + if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) + gc_stack[i] = NULL; } - while ((p = gchead) != NULL) { - gchead = p->gc_next; - cnt++; - unlink_from_pool(p, base, stack); + for (i = 0; i < gc_cnt; i++) { + p = gc_stack[i]; + if (p) { + rb_erase(&p->rb_node, &base->rb_root); + base->total--; + call_rcu(&p->rcu, inetpeer_free_rcu); + } } - return cnt; } struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr, int create) { - struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer *p; - unsigned int sequence; - int invalidated, gccnt = 0; + struct inet_peer *p, *gc_stack[PEER_MAX_GC]; + struct rb_node **pp, *parent; + unsigned int gc_cnt, seq; + int invalidated; /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock(); - sequence = read_seqbegin(&base->lock); - p = lookup_rcu(daddr, base); - invalidated = read_seqretry(&base->lock, sequence); + seq = read_seqbegin(&base->lock); + p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); + invalidated = read_seqretry(&base->lock, seq); rcu_read_unlock(); if (p) @@ -428,36 +198,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ + parent = NULL; write_seqlock_bh(&base->lock); -relookup: - p = lookup(daddr, stack, base); - if (p != peer_avl_empty) { - refcount_inc(&p->refcnt); - write_sequnlock_bh(&base->lock); - return p; - } - if (!gccnt) { - gccnt = inet_peer_gc(base, stack, stackptr); - if (gccnt && create) - goto relookup; - } - p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; - if (p) { - p->daddr = *daddr; - refcount_set(&p->refcnt, 2); - atomic_set(&p->rid, 0); - p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; - p->rate_tokens = 0; - /* 60*HZ is arbitrary, but chosen enough high so that the first - * calculation of tokens is at its maximum. - */ - p->rate_last = jiffies - 60*HZ; - INIT_LIST_HEAD(&p->gc_list); - /* Link the node. */ - link_to_pool(p, base); - base->total++; + gc_cnt = 0; + p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp); + if (!p && create) { + p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); + if (p) { + p->daddr = *daddr; + refcount_set(&p->refcnt, 2); + atomic_set(&p->rid, 0); + p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; + p->rate_tokens = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; + + rb_link_node(&p->rb_node, parent, pp); + rb_insert_color(&p->rb_node, &base->rb_root); + base->total++; + } } + if (gc_cnt) + inet_peer_gc(base, gc_stack, gc_cnt); write_sequnlock_bh(&base->lock); return p; @@ -467,8 +232,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { p->dtime = (__u32)jiffies; - smp_mb__before_atomic(); - refcount_dec(&p->refcnt); + + if (refcount_dec_and_test(&p->refcnt)) + call_rcu(&p->rcu, inetpeer_free_rcu); } EXPORT_SYMBOL_GPL(inet_putpeer); @@ -513,30 +279,16 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) } EXPORT_SYMBOL(inet_peer_xrlim_allow); -static void inetpeer_inval_rcu(struct rcu_head *head) -{ - struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu); - - spin_lock_bh(&gc_lock); - list_add_tail(&p->gc_list, &gc_list); - spin_unlock_bh(&gc_lock); - - schedule_delayed_work(&gc_work, gc_delay); -} - void inetpeer_invalidate_tree(struct inet_peer_base *base) { - struct inet_peer *root; - - write_seqlock_bh(&base->lock); + struct inet_peer *p, *n; - root = rcu_deref_locked(base->root, base); - if (root != peer_avl_empty) { - base->root = peer_avl_empty_rcu; - base->total = 0; - call_rcu(&root->gc_rcu, inetpeer_inval_rcu); + rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) { + inet_putpeer(p); + cond_resched(); } - write_sequnlock_bh(&base->lock); + base->rb_root = RB_ROOT; + base->total = 0; } EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 50c74cd890bc..b631ec685d77 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -853,61 +853,6 @@ csum_page(struct page *page, int offset, int copy) return csum; } -static inline int ip_ufo_append_data(struct sock *sk, - struct sk_buff_head *queue, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int maxfraglen, unsigned int flags) -{ - struct sk_buff *skb; - int err; - - /* There is support for UDP fragmentation offload by network - * device, so create one single skb packet containing complete - * udp datagram - */ - skb = skb_peek_tail(queue); - if (!skb) { - skb = sock_alloc_send_skb(sk, - hh_len + fragheaderlen + transhdrlen + 20, - (flags & MSG_DONTWAIT), &err); - - if (!skb) - return err; - - /* reserve space for Hardware header */ - skb_reserve(skb, hh_len); - - /* create space for UDP/IP header */ - skb_put(skb, fragheaderlen + transhdrlen); - - /* initialize network header pointer */ - skb_reset_network_header(skb); - - /* initialize protocol header pointer */ - skb->transport_header = skb->network_header + fragheaderlen; - - skb->csum = 0; - - if (flags & MSG_CONFIRM) - skb_set_dst_pending_confirm(skb, 1); - - __skb_queue_tail(queue, skb); - } else if (skb_is_gso(skb)) { - goto append; - } - - skb->ip_summed = CHECKSUM_PARTIAL; - /* specify the length of each IP datagram fragment */ - skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - -append: - return skb_append_datato_frags(sk, skb, getfrag, from, - (length - transhdrlen)); -} - static int __ip_append_data(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, @@ -965,18 +910,6 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) || - (skb && skb_is_gso(skb))) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { - err = ip_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, transhdrlen, - maxfraglen, flags); - if (err) - goto error; - return 0; - } /* So, what's going on in the loop below? * @@ -1287,15 +1220,6 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (!skb) return -EINVAL; - if ((size + skb->len > mtu) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { - if (skb->ip_summed != CHECKSUM_PARTIAL) - return -EOPNOTSUPP; - - skb_shinfo(skb)->gso_size = mtu - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - } cork->length += size; while (size > 0) { diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 0192c255e508..5ed63d250950 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; -static bool is_vti_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti_netdev_ops; -} - -static int vti_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip_tunnel *tunnel = netdev_priv(dev); - - if (!is_vti_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(tunnel->net, dev_net(dev))) - xfrm_garbage_collect(tunnel->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti_notifier_block __read_mostly = { - .notifier_call = vti_device_event, -}; - static int __init vti_init(void) { const char *msg; @@ -618,8 +591,6 @@ static int __init vti_init(void) pr_info("IPv4 over IPsec tunneling driver\n"); - register_netdevice_notifier(&vti_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) @@ -652,7 +623,6 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti_notifier_block); pr_err("vti init: failed to register %s\n", msg); return err; } @@ -664,7 +634,6 @@ static void __exit vti_fini(void) xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); - unregister_netdevice_notifier(&vti_notifier_block); } module_init(vti_init); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4e985dea1dd2..886d874775df 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2377,7 +2377,6 @@ bool tcp_schedule_loss_probe(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 timeout, tlp_time_stamp, rto_time_stamp; - u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); /* No consecutive loss probes. */ if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { @@ -2406,15 +2405,19 @@ bool tcp_schedule_loss_probe(struct sock *sk) tcp_send_head(sk)) return false; - /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account + /* Probe timeout is 2*rtt. Add minimum RTO to account * for delayed ack when there's one outstanding packet. If no RTT * sample is available then probe after TCP_TIMEOUT_INIT. */ - timeout = rtt << 1 ? : TCP_TIMEOUT_INIT; - if (tp->packets_out == 1) - timeout = max_t(u32, timeout, - (rtt + (rtt >> 1) + TCP_DELACK_MAX)); - timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + if (tp->srtt_us) { + timeout = usecs_to_jiffies(tp->srtt_us >> 2); + if (tp->packets_out == 1) + timeout += TCP_RTO_MIN; + else + timeout += TCP_TIMEOUT_MIN; + } else { + timeout = TCP_TIMEOUT_INIT; + } /* If RTO is shorter, just schedule TLP in its place. */ tlp_time_stamp = tcp_jiffies32 + timeout; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index fe9a493d0208..449cd914d58e 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk) tp->rack.advanced = 0; tcp_rack_detect_loss(sk, &timeout); if (timeout) { - timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN); + timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, timeout, inet_csk(sk)->icsk_rto); } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 781250151d40..97658bfc1b58 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool remcsum, need_csum, offload_csum, ufo, gso_partial; + bool remcsum, need_csum, offload_csum, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; @@ -61,8 +61,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; - ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); - need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && @@ -77,7 +75,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * outer one so strip the existing checksum feature flags and * instead set the flag based on our outer checksum offload value. */ - if (remcsum || ufo) { + if (remcsum) { features &= ~NETIF_F_CSUM_MASK; if (!need_csum || offload_csum) features |= NETIF_F_HW_CSUM; @@ -189,66 +187,16 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); -static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - __wsum csum; - struct udphdr *uh; - struct iphdr *iph; if (skb->encapsulation && (skb_shinfo(skb)->gso_type & - (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) segs = skb_udp_tunnel_segment(skb, features, false); - goto out; - } - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - - uh = udp_hdr(skb); - iph = ip_hdr(skb); - - uh->check = 0; - csum = skb_checksum(skb, 0, skb->len, 0); - uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_NONE; - - /* If there is no outer header we can fake a checksum offload - * due to the fact that we have already done the checksum in - * software prior to segmenting the frame. - */ - if (!skb->encap_hdr_csum) - features |= NETIF_F_HW_CSUM; - - /* Fragment the skb. IP headers of the fragments are updated in - * inet_gso_segment() - */ - segs = skb_segment(skb, features); -out: return segs; } @@ -382,7 +330,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv4_offload = { .callbacks = { - .gso_segment = udp4_ufo_fragment, + .gso_segment = udp4_tunnel_segment, .gro_receive = udp4_gro_receive, .gro_complete = udp4_gro_complete, }, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 71b4ecc195c7..4aefb149fe0a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl4->flowi4_tos = iph->tos; } -static inline int xfrm4_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); - - xfrm_garbage_collect_deferred(net); - return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); -} - static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -259,14 +251,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, - .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, .redirect = xfrm4_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1422d6c08377..c6ec06465ce0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1114,69 +1114,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -static inline int ip6_ufo_append_data(struct sock *sk, - struct sk_buff_head *queue, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int hh_len, int fragheaderlen, - int exthdrlen, int transhdrlen, int mtu, - unsigned int flags, const struct flowi6 *fl6) - -{ - struct sk_buff *skb; - int err; - - /* There is support for UDP large send offload by network - * device, so create one single skb packet containing complete - * udp datagram - */ - skb = skb_peek_tail(queue); - if (!skb) { - skb = sock_alloc_send_skb(sk, - hh_len + fragheaderlen + transhdrlen + 20, - (flags & MSG_DONTWAIT), &err); - if (!skb) - return err; - - /* reserve space for Hardware header */ - skb_reserve(skb, hh_len); - - /* create space for UDP/IP header */ - skb_put(skb, fragheaderlen + transhdrlen); - - /* initialize network header pointer */ - skb_set_network_header(skb, exthdrlen); - - /* initialize protocol header pointer */ - skb->transport_header = skb->network_header + fragheaderlen; - - skb->protocol = htons(ETH_P_IPV6); - skb->csum = 0; - - if (flags & MSG_CONFIRM) - skb_set_dst_pending_confirm(skb, 1); - - __skb_queue_tail(queue, skb); - } else if (skb_is_gso(skb)) { - goto append; - } - - skb->ip_summed = CHECKSUM_PARTIAL; - /* Specify the length of each IPv6 datagram fragment. - * It has to be a multiple of 8. - */ - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - - sizeof(struct frag_hdr)) & ~7; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk), - &fl6->daddr, - &fl6->saddr); - -append: - return skb_append_datato_frags(sk, skb, getfrag, from, - (length - transhdrlen)); -} - static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { @@ -1385,19 +1322,6 @@ emsgsize: */ cork->length += length; - if ((((length + (skb ? skb->len : headersize)) > mtu) || - (skb && skb_is_gso(skb))) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { - err = ip6_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, exthdrlen, - transhdrlen, mtu, flags, fl6); - if (err) - goto error; - return 0; - } - if (!skb) goto alloc_new_skb; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 486c2305f53c..79444a4bfd6d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; -static bool is_vti6_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti6_netdev_ops; -} - -static int vti6_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip6_tnl *t = netdev_priv(dev); - - if (!is_vti6_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(t->net, dev_net(dev))) - xfrm_garbage_collect(t->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti6_notifier_block __read_mostly = { - .notifier_call = vti6_device_event, -}; - /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void) const char *msg; int err; - register_netdevice_notifier(&vti6_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) @@ -1216,7 +1187,6 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti6_notifier_block); pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void) xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); - unregister_netdevice_notifier(&vti6_notifier_block); } module_init(vti6_tunnel_init); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a2267f80febb..455fd4e39333 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -17,109 +17,15 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - unsigned int unfrag_ip6hlen, unfrag_len; - struct frag_hdr *fptr; - u8 *packet_start, *prevhdr; - u8 nexthdr; - u8 frag_hdr_sz = sizeof(struct frag_hdr); - __wsum csum; - int tnl_hlen; - int err; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - /* Set the IPv6 fragment id if not set yet */ - if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(dev_net(skb->dev), skb); - - segs = NULL; - goto out; - } if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features, true); - else { - const struct ipv6hdr *ipv6h; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - - uh = udp_hdr(skb); - ipv6h = ipv6_hdr(skb); - - uh->check = 0; - csum = skb_checksum(skb, 0, skb->len, 0); - uh->check = udp_v6_check(skb->len, &ipv6h->saddr, - &ipv6h->daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_NONE; - - /* If there is no outer header we can fake a checksum offload - * due to the fact that we have already done the checksum in - * software prior to segmenting the frame. - */ - if (!skb->encap_hdr_csum) - features |= NETIF_F_HW_CSUM; - - /* Check if there is enough headroom to insert fragment header. */ - tnl_hlen = skb_tnl_header_len(skb); - if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { - if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) - goto out; - } - - /* Find the unfragmentable header and shift it left by frag_hdr_sz - * bytes to insert fragment header. - */ - err = ip6_find_1stfragopt(skb, &prevhdr); - if (err < 0) - return ERR_PTR(err); - unfrag_ip6hlen = err; - nexthdr = *prevhdr; - *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + - unfrag_ip6hlen + tnl_hlen; - packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; - memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); - - SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; - skb->mac_header -= frag_hdr_sz; - skb->network_header -= frag_hdr_sz; - - fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); - fptr->nexthdr = nexthdr; - fptr->reserved = 0; - if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(dev_net(skb->dev), skb); - fptr->identification = skb_shinfo(skb)->ip6_frag_id; - - /* Fragment the skb. ipv6 header and the remaining fields of the - * fragment header are updated in ipv6_gso_segment() - */ - segs = skb_segment(skb, features); - } -out: return segs; } @@ -169,7 +75,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv6_offload = { .callbacks = { - .gso_segment = udp6_ufo_fragment, + .gso_segment = udp6_tunnel_segment, .gro_receive = udp6_gro_receive, .gro_complete = udp6_gro_complete, }, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 79651bc71bf0..f44b25a48478 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) } } -static inline int xfrm6_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); - - xfrm_garbage_collect_deferred(net); - return dst_entries_get_fast(ops) > ops->gc_thresh * 2; -} - static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -279,14 +271,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, - .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { diff --git a/net/key/af_key.c b/net/key/af_key.c index ca9d3ae665e7..10d7133e4fe9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa out: xfrm_pol_put(xp); - if (err == 0) - xfrm_garbage_collect(net); return err; } @@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); - if (!err) - xfrm_garbage_collect(net); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 45fe8c8a884d..f6e229b51dfb 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -335,8 +335,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { - unsigned short gso_type = skb_shinfo(skb)->gso_type; - struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; @@ -347,21 +345,9 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, if (segs == NULL) return -EINVAL; - if (gso_type & SKB_GSO_UDP) { - /* The initial flow key extracted by ovs_flow_key_extract() - * in this case is for a first fragment, so we need to - * properly mark later fragments. - */ - later_key = *key; - later_key.ip.frag = OVS_FRAG_TYPE_LATER; - } - /* Queue all of the segments. */ skb = segs; do { - if (gso_type & SKB_GSO_UDP && skb != segs) - key = &later_key; - err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); if (err) break; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 3f76cb765e5b..8c94cef25a72 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -72,8 +72,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, const struct sk_buff *skb) { struct flow_stats *stats; - int node = numa_node_id(); - int cpu = smp_processor_id(); + unsigned int cpu = smp_processor_id(); int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); stats = rcu_dereference(flow->stats[cpu]); @@ -108,7 +107,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, __GFP_THISNODE | __GFP_NOWARN | __GFP_NOMEMALLOC, - node); + numa_node_id()); if (likely(new_stats)) { new_stats->used = jiffies; new_stats->packet_count = 1; @@ -118,6 +117,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, rcu_assign_pointer(flow->stats[cpu], new_stats); + cpumask_set_cpu(cpu, &flow->cpu_used_mask); goto unlock; } } @@ -145,7 +145,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow, memset(ovs_stats, 0, sizeof(*ovs_stats)); /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { @@ -169,7 +169,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow) int cpu; /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { @@ -584,8 +584,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) key->ip.frag = OVS_FRAG_TYPE_LATER; return 0; } - if (nh->frag_off & htons(IP_MF) || - skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + if (nh->frag_off & htons(IP_MF)) key->ip.frag = OVS_FRAG_TYPE_FIRST; else key->ip.frag = OVS_FRAG_TYPE_NONE; @@ -701,9 +700,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (key->ip.frag == OVS_FRAG_TYPE_LATER) return 0; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - key->ip.frag = OVS_FRAG_TYPE_FIRST; - /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a9bc1c875965..1875bba4f865 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -31,6 +31,7 @@ #include <linux/jiffies.h> #include <linux/time.h> #include <linux/flex_array.h> +#include <linux/cpumask.h> #include <net/inet_ecn.h> #include <net/ip_tunnels.h> #include <net/dst_metadata.h> @@ -219,6 +220,7 @@ struct sw_flow { */ struct sw_flow_key key; struct sw_flow_id id; + struct cpumask cpu_used_mask; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats __rcu *stats[]; /* One for each CPU. First one diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index ea7a8073fa02..80ea2a71852e 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -98,6 +98,8 @@ struct sw_flow *ovs_flow_alloc(void) RCU_INIT_POINTER(flow->stats[0], stats); + cpumask_set_cpu(0, &flow->cpu_used_mask); + return flow; err: kmem_cache_free(flow_cache, flow); @@ -141,7 +143,7 @@ static void flow_free(struct sw_flow *flow) if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, (struct flow_stats __force *)flow->stats[cpu]); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 008bb34ee324..e7303f68972d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -177,8 +177,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) -#define PGV_FROM_VMALLOC 1 - #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) #define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) diff --git a/net/rds/connection.c b/net/rds/connection.c index 50a3789ac23e..005bca68aa94 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -374,13 +374,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) if (!cp->cp_transport_data) return; - rds_conn_path_drop(cp); - flush_work(&cp->cp_down_w); - /* make sure lingering queued work won't try to ref the conn */ cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); + rds_conn_path_drop(cp, true); + flush_work(&cp->cp_down_w); + /* tear down queued messages */ list_for_each_entry_safe(rm, rtmp, &cp->cp_send_queue, @@ -664,9 +664,13 @@ void rds_conn_exit(void) /* * Force a disconnect */ -void rds_conn_path_drop(struct rds_conn_path *cp) +void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) { atomic_set(&cp->cp_state, RDS_CONN_ERROR); + + if (!destroy && cp->cp_conn->c_destroy_in_prog) + return; + queue_work(rds_wq, &cp->cp_down_w); } EXPORT_SYMBOL_GPL(rds_conn_path_drop); @@ -674,7 +678,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop); void rds_conn_drop(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); - rds_conn_path_drop(&conn->c_path[0]); + rds_conn_path_drop(&conn->c_path[0], false); } EXPORT_SYMBOL_GPL(rds_conn_drop); @@ -706,5 +710,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) vprintk(fmt, ap); va_end(ap); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } diff --git a/net/rds/rds.h b/net/rds/rds.h index 516bcc89b46f..3382695bf46c 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -700,7 +700,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net, void rds_conn_shutdown(struct rds_conn_path *cpath); void rds_conn_destroy(struct rds_connection *conn); void rds_conn_drop(struct rds_connection *conn); -void rds_conn_path_drop(struct rds_conn_path *cpath); +void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); void rds_conn_connect_if_down(struct rds_connection *conn); void rds_conn_path_connect_if_down(struct rds_conn_path *cp); void rds_for_each_conn_info(struct socket *sock, unsigned int len, diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 431404dbdad1..6b7ee71f40c6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net) continue; /* reconnect with new parameters */ - rds_conn_path_drop(tc->t_cpath); + rds_conn_path_drop(tc->t_cpath, false); } spin_unlock_irq(&rds_tcp_conn_lock); } diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index cbe08a1fa4c7..46f74dad0e16 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk) if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) && rds_conn_path_transition(cp, RDS_CONN_CONNECTING, RDS_CONN_ERROR)) { - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } else { rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } break; case TCP_CLOSE_WAIT: case TCP_CLOSE: - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); default: break; } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 0d8616aa5bad..dc860d1bb608 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -157,7 +157,7 @@ out: "returned %d, " "disconnecting and reconnecting\n", &conn->c_faddr, cp->cp_index, ret); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } } } diff --git a/net/rds/threads.c b/net/rds/threads.c index 2852bc1d37d4..f121daa402c8 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) "current state is %d\n", __func__, atomic_read(&cp->cp_state)); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); return; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3317a2f579da..67afc12df88b 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -231,9 +231,6 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, const struct iphdr *iph; u16 ul; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - return 1; - /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, @@ -287,9 +284,6 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, const struct ipv6hdr *ip6h; u16 ul; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - return 1; - /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, diff --git a/net/sctp/auth.c b/net/sctp/auth.c index e001b01b0e68..00667c50efa7 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -185,9 +185,9 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, * are called the two key vectors. */ static struct sctp_auth_bytes *sctp_auth_make_key_vector( - sctp_random_param_t *random, - sctp_chunks_param_t *chunks, - sctp_hmac_algo_param_t *hmacs, + struct sctp_random_param *random, + struct sctp_chunks_param *chunks, + struct sctp_hmac_algo_param *hmacs, gfp_t gfp) { struct sctp_auth_bytes *new; @@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector( gfp_t gfp) { return sctp_auth_make_key_vector( - (sctp_random_param_t *)asoc->c.auth_random, - (sctp_chunks_param_t *)asoc->c.auth_chunks, - (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, - gfp); + (struct sctp_random_param *)asoc->c.auth_random, + (struct sctp_chunks_param *)asoc->c.auth_chunks, + (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp); } /* Make a key vector based on peer's parameters */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 0e86f988f836..3d506b2f6193 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -73,13 +73,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. */ - auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) + - sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); + auth_hmacs = kzalloc(sizeof(*auth_hmacs) + + sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); if (!auth_hmacs) goto nomem; - auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) + - SCTP_NUM_CHUNK_TYPES, gfp); + auth_chunks = kzalloc(sizeof(*auth_chunks) + + SCTP_NUM_CHUNK_TYPES, gfp); if (!auth_chunks) goto nomem; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2a186b201ad2..107d7c912922 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -497,7 +497,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr, static int sctp_v6_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { - int length = sizeof(sctp_ipv6addr_param_t); + int length = sizeof(struct sctp_ipv6addr_param); param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS; param->v6.param_hdr.length = htons(length); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 989a900383b5..852556d67ae3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -292,7 +292,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr, static int sctp_v4_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { - int length = sizeof(sctp_ipv4addr_param_t); + int length = sizeof(struct sctp_ipv4addr_param); param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS; param->v4.param_hdr.length = htons(length); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 6110447fe51d..0b36e96cb0df 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -223,10 +223,10 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, struct sctp_chunk *retval = NULL; int num_types, addrs_len = 0; struct sctp_sock *sp; - sctp_supported_addrs_param_t sat; + struct sctp_supported_addrs_param sat; __be16 types[2]; - sctp_adaptation_ind_param_t aiparam; - sctp_supported_ext_param_t ext_param; + struct sctp_adaptation_ind_param aiparam; + struct sctp_supported_ext_param ext_param; int num_ext = 0; __u8 extensions[4]; struct sctp_paramhdr *auth_chunks = NULL, @@ -305,8 +305,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -348,10 +347,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, */ if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; - ext_param.param_hdr.length = - htons(sizeof(sctp_supported_ext_param_t) + num_ext); - sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), - &ext_param); + ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); + sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } @@ -393,8 +390,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_cookie_param_t *cookie; int cookie_len; size_t chunksize; - sctp_adaptation_ind_param_t aiparam; - sctp_supported_ext_param_t ext_param; + struct sctp_adaptation_ind_param aiparam; + struct sctp_supported_ext_param ext_param; int num_ext = 0; __u8 extensions[4]; struct sctp_paramhdr *auth_chunks = NULL, @@ -468,8 +465,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); @@ -495,10 +491,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; - ext_param.param_hdr.length = - htons(sizeof(sctp_supported_ext_param_t) + num_ext); - sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), - &ext_param); + ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); + sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } if (asoc->peer.prsctp_capable) @@ -3153,7 +3147,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, case SCTP_PARAM_ERR_CAUSE: break; case SCTP_PARAM_IPV4_ADDRESS: - if (length != sizeof(sctp_ipv4addr_param_t)) + if (length != sizeof(struct sctp_ipv4addr_param)) return false; /* ensure there is only one addr param and it's in the * beginning of addip_hdr params, or we reject it. @@ -3163,7 +3157,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, addr_param_seen = true; break; case SCTP_PARAM_IPV6_ADDRESS: - if (length != sizeof(sctp_ipv6addr_param_t)) + if (length != sizeof(struct sctp_ipv6addr_param)) return false; if (param.v != addip->addip_hdr.params) return false; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b2a74c3823ee..ae4c48c4f657 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2336,13 +2336,12 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - struct sctp_chunk *chunk = arg; - u32 stale; - sctp_cookie_preserve_param_t bht; - sctp_errhdr_t *err; - struct sctp_chunk *reply; - struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; + struct sctp_chunk *chunk = arg, *reply; + struct sctp_cookie_preserve_param bht; + struct sctp_bind_addr *bp; + sctp_errhdr_t *err; + u32 stale; if (attempts > asoc->max_init_attempts) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7b52a380d710..5c53f22d62e8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1528,26 +1528,13 @@ static inline bool too_many_unix_fds(struct task_struct *p) return false; } -#define MAX_RECURSION_LEVEL 4 - static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; - unsigned char max_level = 0; if (too_many_unix_fds(current)) return -ETOOMANYREFS; - for (i = scm->fp->count - 1; i >= 0; i--) { - struct sock *sk = unix_get_socket(scm->fp->fp[i]); - - if (sk) - max_level = max(max_level, - unix_sk(sk)->recursion_level); - } - if (unlikely(max_level > MAX_RECURSION_LEVEL)) - return -ETOOMANYREFS; - /* * Need to duplicate file references for the sake of garbage * collection. Otherwise a socket in the fps might become a @@ -1559,7 +1546,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count - 1; i >= 0; i--) unix_inflight(scm->fp->user, scm->fp->fp[i]); - return max_level; + return 0; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1649,7 +1636,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct sk_buff *skb; long timeo; struct scm_cookie scm; - int max_level; int data_len = 0; int sk_locked; @@ -1701,7 +1687,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; - max_level = err + 1; skb_put(skb, len - data_len); skb->data_len = data_len; @@ -1819,8 +1804,6 @@ restart_locked: __net_timestamp(skb); maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); @@ -1855,7 +1838,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int sent = 0; struct scm_cookie scm; bool fds_sent = false; - int max_level; int data_len; wait_for_unix_gc(); @@ -1905,7 +1887,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, kfree_skb(skb); goto out_err; } - max_level = err + 1; fds_sent = true; skb_put(skb, size - data_len); @@ -1925,8 +1906,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2324,7 +2303,6 @@ redo: last_len = last ? last->len : 0; again: if (skb == NULL) { - unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5f7e8bfa0c2d..5cd7a244e88d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev) static int xfrm_dev_unregister(struct net_device *dev) { + xfrm_policy_cache_flush(); return NOTIFY_DONE; } @@ -175,8 +176,7 @@ static int xfrm_dev_down(struct net_device *dev) if (dev->features & NETIF_F_HW_ESP) xfrm_dev_state_flush(dev_net(dev), dev, true); - xfrm_garbage_collect(dev_net(dev)); - + xfrm_policy_cache_flush(); return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ff61d8557929..06c3bf7ab86b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -24,6 +24,7 @@ #include <linux/netfilter.h> #include <linux/module.h> #include <linux/cache.h> +#include <linux/cpu.h> #include <linux/audit.h> #include <net/dst.h> #include <net/flow.h> @@ -44,6 +45,8 @@ struct xfrm_flo { u8 flags; }; +static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); +static struct work_struct *xfrm_pcpu_work __read_mostly; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; @@ -246,36 +249,6 @@ expired: xfrm_pol_put(xp); } -static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - if (unlikely(pol->walk.dead)) - flo = NULL; - else - xfrm_pol_hold(pol); - - return flo; -} - -static int xfrm_policy_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - return !pol->walk.dead; -} - -static void xfrm_policy_flo_delete(struct flow_cache_object *flo) -{ - xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); -} - -static const struct flow_cache_ops xfrm_policy_fc_ops = { - .get = xfrm_policy_flo_get, - .check = xfrm_policy_flo_check, - .delete = xfrm_policy_flo_delete, -}; - /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ @@ -298,7 +271,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) (unsigned long)policy); setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, (unsigned long)policy); - policy->flo.ops = &xfrm_policy_fc_ops; } return policy; } @@ -798,7 +770,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); __xfrm_policy_link(policy, dir); - atomic_inc(&net->xfrm.flow_cache_genid); /* After previous checking, family can either be AF_INET or AF_INET6 */ if (policy->family == AF_INET) @@ -1004,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) } if (!cnt) err = -ESRCH; + else + xfrm_policy_cache_flush(); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; @@ -1175,7 +1148,7 @@ fail: } static struct xfrm_policy * -__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; @@ -1187,61 +1160,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); } -static int flow_to_policy_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - - switch (dir) { - default: - case FLOW_DIR_IN: - return XFRM_POLICY_IN; - case FLOW_DIR_OUT: - return XFRM_POLICY_OUT; - case FLOW_DIR_FWD: - return XFRM_POLICY_FWD; - } -} - -static struct flow_cache_object * -xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, - u8 dir, struct flow_cache_object *old_obj, void *ctx) -{ - struct xfrm_policy *pol; - - if (old_obj) - xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); - - pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); - if (IS_ERR_OR_NULL(pol)) - return ERR_CAST(pol); - - /* Resolver returns two references: - * one for cache and one for caller of flow_cache_lookup() */ - xfrm_pol_hold(pol); - - return &pol->flo; -} - -static inline int policy_to_flow_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - switch (dir) { - default: - case XFRM_POLICY_IN: - return FLOW_DIR_IN; - case XFRM_POLICY_OUT: - return FLOW_DIR_OUT; - case XFRM_POLICY_FWD: - return FLOW_DIR_FWD; - } -} - static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl, u16 family) { @@ -1261,7 +1179,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, } err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, - policy_to_flow_dir(dir)); + dir); if (!err) { if (!xfrm_pol_hold_rcu(pol)) goto again; @@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family) return tos; } -static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (xdst->route == NULL) { - /* Dummy bundle - if it has xfrms we were not - * able to build bundle as template resolution failed. - * It means we need to try again resolving. */ - if (xdst->num_xfrms > 0) - return NULL; - } else if (dst->flags & DST_XFRM_QUEUE) { - return NULL; - } else { - /* Real bundle */ - if (stale_bundle(dst)) - return NULL; - } - - dst_hold(dst); - return flo; -} - -static int xfrm_bundle_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (!xdst->route) - return 0; - if (stale_bundle(dst)) - return 0; - - return 1; -} - -static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - dst->obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(dst); -} - -static const struct flow_cache_ops xfrm_bundle_fc_ops = { - .get = xfrm_bundle_flo_get, - .check = xfrm_bundle_flo_check, - .delete = xfrm_bundle_flo_delete, -}; - static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) struct dst_entry *dst = &xdst->u.dst; memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); - xdst->flo.ops = &xfrm_bundle_fc_ops; } else xdst = ERR_PTR(-ENOBUFS); @@ -1840,6 +1705,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, } +static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old) +{ + this_cpu_write(xfrm_last_dst, xdst); + if (old) + dst_release(&old->u.dst); +} + +static void __xfrm_pcpu_work_fn(void) +{ + struct xfrm_dst *old; + + old = this_cpu_read(xfrm_last_dst); + if (old && !xfrm_bundle_ok(old)) + xfrm_last_dst_update(NULL, old); +} + +static void xfrm_pcpu_work_fn(struct work_struct *work) +{ + local_bh_disable(); + rcu_read_lock(); + __xfrm_pcpu_work_fn(); + rcu_read_unlock(); + local_bh_enable(); +} + +void xfrm_policy_cache_flush(void) +{ + struct xfrm_dst *old; + bool found = 0; + int cpu; + + local_bh_disable(); + rcu_read_lock(); + for_each_possible_cpu(cpu) { + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + if (smp_processor_id() == cpu) { + __xfrm_pcpu_work_fn(); + continue; + } + found = true; + break; + } + } + + rcu_read_unlock(); + local_bh_enable(); + + if (!found) + return; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + bool bundle_release; + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + bundle_release = old && !xfrm_bundle_ok(old); + rcu_read_unlock(); + + if (!bundle_release) + continue; + + if (cpu_online(cpu)) { + schedule_work_on(cpu, &xfrm_pcpu_work[cpu]); + continue; + } + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + per_cpu(xfrm_last_dst, cpu) = NULL; + dst_release(&old->u.dst); + } + rcu_read_unlock(); + } + + put_online_cpus(); +} + +static bool xfrm_pol_dead(struct xfrm_dst *xdst) +{ + unsigned int num_pols = xdst->num_pols; + unsigned int pol_dead = 0, i; + + for (i = 0; i < num_pols; i++) + pol_dead |= xdst->pols[i]->walk.dead; + + /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ + if (pol_dead) + xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + + return pol_dead; +} + static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, const struct flowi *fl, u16 family, @@ -1847,10 +1808,22 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct xfrm_dst *xdst, *old; struct dst_entry *dst; - struct xfrm_dst *xdst; int err; + xdst = this_cpu_read(xfrm_last_dst); + if (xdst && + xdst->u.dst.dev == dst_orig->dev && + xdst->num_pols == num_pols && + !xfrm_pol_dead(xdst) && + memcmp(xdst->pols, pols, + sizeof(struct xfrm_policy *) * num_pols) == 0) { + dst_hold(&xdst->u.dst); + return xdst; + } + + old = xdst; /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); if (err <= 0) { @@ -1871,6 +1844,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); + atomic_set(&xdst->u.dst.__refcnt, 2); + xfrm_last_dst_update(xdst, old); + return xdst; } @@ -2051,86 +2027,39 @@ free_dst: goto out; } -static struct flow_cache_object * -xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, - struct flow_cache_object *oldflo, void *ctx) +static struct xfrm_dst * +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo) { - struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct xfrm_dst *xdst, *new_xdst; - int num_pols = 0, num_xfrms = 0, i, err, pol_dead; - - /* Check if the policies from old bundle are usable */ - xdst = NULL; - if (oldflo) { - xdst = container_of(oldflo, struct xfrm_dst, flo); - num_pols = xdst->num_pols; - num_xfrms = xdst->num_xfrms; - pol_dead = 0; - for (i = 0; i < num_pols; i++) { - pols[i] = xdst->pols[i]; - pol_dead |= pols[i]->walk.dead; - } - if (pol_dead) { - /* Mark DST_OBSOLETE_DEAD to fail the next - * xfrm_dst_check() - */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - xdst = NULL; - num_pols = 0; - num_xfrms = 0; - oldflo = NULL; - } - } + int num_pols = 0, num_xfrms = 0, err; + struct xfrm_dst *xdst; /* Resolve policies to use if we couldn't get them from * previous cache entry */ - if (xdst == NULL) { - num_pols = 1; - pols[0] = __xfrm_policy_lookup(net, fl, family, - flow_to_policy_dir(dir)); - err = xfrm_expand_policies(fl, family, pols, + num_pols = 1; + pols[0] = xfrm_policy_lookup(net, fl, family, dir); + err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); - if (err < 0) - goto inc_error; - if (num_pols == 0) - return NULL; - if (num_xfrms <= 0) - goto make_dummy_bundle; - } + if (err < 0) + goto inc_error; + if (num_pols == 0) + return NULL; + if (num_xfrms <= 0) + goto make_dummy_bundle; - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); - if (IS_ERR(new_xdst)) { - err = PTR_ERR(new_xdst); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); if (err != -EAGAIN) goto error; - if (oldflo == NULL) - goto make_dummy_bundle; - dst_hold(&xdst->u.dst); - return oldflo; - } else if (new_xdst == NULL) { + goto make_dummy_bundle; + } else if (xdst == NULL) { num_xfrms = 0; - if (oldflo == NULL) - goto make_dummy_bundle; - xdst->num_xfrms = 0; - dst_hold(&xdst->u.dst); - return oldflo; - } - - /* Kill the previous bundle */ - if (xdst) { - /* The policies were stolen for newly generated bundle */ - xdst->num_pols = 0; - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); + goto make_dummy_bundle; } - /* We do need to return one reference for original caller */ - dst_hold(&new_xdst->u.dst); - return &new_xdst->flo; + return xdst; make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: @@ -2146,17 +2075,12 @@ make_dummy_bundle: memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); dst_hold(&xdst->u.dst); - return &xdst->flo; + return xdst; inc_error: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); error: - if (xdst != NULL) { - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - } else - xfrm_pols_put(pols, num_pols); + xfrm_pols_put(pols, num_pols); return ERR_PTR(err); } @@ -2187,11 +2111,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct flow_cache_object *flo; struct xfrm_dst *xdst; struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; - u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + u8 dir = XFRM_POLICY_OUT; int i, err, num_pols, num_xfrms = 0, drop_pols = 0; dst = NULL; @@ -2242,15 +2165,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, &xflo); - if (flo == NULL) + xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo); + if (xdst == NULL) goto nopol; - if (IS_ERR(flo)) { - err = PTR_ERR(flo); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); goto dropdst; } - xdst = container_of(flo, struct xfrm_dst, flo); num_pols = xdst->num_pols; num_xfrms = xdst->num_xfrms; @@ -2449,12 +2370,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, int pi; int reverse; struct flowi fl; - u8 fl_dir; int xerr_idx = -1; reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; - fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); @@ -2486,16 +2405,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } } - if (!pol) { - struct flow_cache_object *flo; - - flo = flow_cache_lookup(net, &fl, family, fl_dir, - xfrm_policy_lookup, NULL); - if (IS_ERR_OR_NULL(flo)) - pol = ERR_CAST(flo); - else - pol = container_of(flo, struct xfrm_policy, flo); - } + if (!pol) + pol = xfrm_policy_lookup(net, &fl, family, dir); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); @@ -2641,11 +2552,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * notice. That's what we are validating here via the * stale_bundle() check. * - * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will - * be marked on it. * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will * be marked on it. - * Both will force stable_bundle() to fail on any xdst bundle with + * This will force stale_bundle() to fail on any xdst bundle with * this dst linked in it. */ if (dst->obsolete < 0 && !stale_bundle(dst)) @@ -2685,18 +2594,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -void xfrm_garbage_collect(struct net *net) -{ - flow_cache_flush(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect); - -void xfrm_garbage_collect_deferred(struct net *net) -{ - flow_cache_flush_deferred(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect_deferred); - static void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -3034,14 +2931,9 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; - rv = flow_cache_init(net); - if (rv < 0) - goto out; return 0; -out: - xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -3054,7 +2946,6 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { - flow_cache_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -3068,7 +2959,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { - flow_cache_hp_init(); + int i; + + xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work), + GFP_KERNEL); + BUG_ON(!xfrm_pcpu_work); + + for (i = 0; i < NR_CPUS; i++) + INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn); + register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 6c0956d10db6..82cbbce69b79 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -724,9 +724,10 @@ restart: } } } - if (cnt) + if (cnt) { err = 0; - + xfrm_policy_cache_flush(); + } out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2be4c6af008a..1b539b7dcfab 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; return err; } - xfrm_garbage_collect(net); c.data.type = type; c.event = nlh->nlmsg_type; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 87246be6feb8..770d46cdf9f4 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -37,6 +37,8 @@ hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example hostprogs-y += load_sock_ops +hostprogs-y += xdp_redirect +hostprogs-y += xdp_redirect_map # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -78,6 +80,8 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o +xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o +xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -119,6 +123,8 @@ always += tcp_bufs_kern.o always += tcp_cong_kern.o always += tcp_iw_kern.o always += tcp_clamp_kern.o +always += xdp_redirect_kern.o +always += xdp_redirect_map_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -155,6 +161,8 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf HOSTLOADLIBES_lwt_len_hist += -l elf HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf +HOSTLOADLIBES_xdp_redirect += -lelf +HOSTLOADLIBES_xdp_redirect_map += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c new file mode 100644 index 000000000000..a34ad457a684 --- /dev/null +++ b/samples/bpf/xdp_redirect_kern.c @@ -0,0 +1,81 @@ +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 1, +}; + + +static void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +SEC("xdp_redirect") +int xdp_redirect_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + int *ifindex, port = 0; + long *value; + u32 key = 0; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + ifindex = bpf_map_lookup_elem(&tx_port, &port); + if (!ifindex) + return rc; + + value = bpf_map_lookup_elem(&rxcnt, &key); + if (value) + *value += 1; + + swap_src_dst_mac(data); + return bpf_redirect(*ifindex, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c new file mode 100644 index 000000000000..2faf196e17ea --- /dev/null +++ b/samples/bpf/xdp_redirect_map_kern.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_DEVMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 100, +}; + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 1, +}; + + +static void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +SEC("xdp_redirect_map") +int xdp_redirect_map_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + int vport, port = 0, m = 0; + long *value; + u32 key = 0; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + /* constant virtual port */ + vport = 0; + + /* count packet in global counter */ + value = bpf_map_lookup_elem(&rxcnt, &key); + if (value) + *value += 1; + + swap_src_dst_mac(data); + + /* send packet out physical port */ + return bpf_redirect_map(&tx_port, vport, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c new file mode 100644 index 000000000000..a1ad00fdaa8a --- /dev/null +++ b/samples/bpf/xdp_redirect_map_user.c @@ -0,0 +1,137 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libgen.h> + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + +static int ifindex_in; +static int ifindex_out; + +static __u32 xdp_flags; + +static void int_exit(int sig) +{ + set_link_xdp_fd(ifindex_in, -1, xdp_flags); + exit(0); +} + +/* simple per-protocol drop counter + */ +static void poll_stats(int interval, int ifindex) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus], prev[nr_cpus]; + + memset(prev, 0, sizeof(prev)); + + while (1) { + __u64 sum = 0; + __u32 key = 0; + int i; + + sleep(interval); + assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[i]); + if (sum) + printf("ifindex %i: %10llu pkt/s\n", + ifindex, sum / interval); + memcpy(prev, values, sizeof(values)); + } +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -N enforce native mode\n", + prog); +} + + +int main(int argc, char **argv) +{ + const char *optstr = "SN"; + char filename[256]; + int ret, opt, key = 0; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]); + return 1; + } + + ifindex_in = strtoul(argv[optind], NULL, 0); + ifindex_out = strtoul(argv[optind + 1], NULL, 0); + printf("input: %d output: %d\n", ifindex_in, ifindex_out); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n", + map_fd[0], map_fd[1], map_fd[2]); + + /* populate virtual to physical port map */ + ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + poll_stats(2, ifindex_out); + +out: + return 0; +} diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c new file mode 100644 index 000000000000..f705a1905d2d --- /dev/null +++ b/samples/bpf/xdp_redirect_user.c @@ -0,0 +1,134 @@ +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libgen.h> + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + +static int ifindex_in; +static int ifindex_out; + +static __u32 xdp_flags; + +static void int_exit(int sig) +{ + set_link_xdp_fd(ifindex_in, -1, xdp_flags); + exit(0); +} + +/* simple per-protocol drop counter + */ +static void poll_stats(int interval, int ifindex) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u64 values[nr_cpus], prev[nr_cpus]; + + memset(prev, 0, sizeof(prev)); + + while (1) { + __u64 sum = 0; + __u32 key = 0; + int i; + + sleep(interval); + assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[i]); + if (sum) + printf("ifindex %i: %10llu pkt/s\n", + ifindex, sum / interval); + memcpy(prev, values, sizeof(values)); + } +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -N enforce native mode\n", + prog); +} + + +int main(int argc, char **argv) +{ + const char *optstr = "SN"; + char filename[256]; + int ret, opt, key = 0; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]); + return 1; + } + + ifindex_in = strtoul(argv[optind], NULL, 0); + ifindex_out = strtoul(argv[optind + 1], NULL, 0); + printf("input: %d output: %d\n", ifindex_in, ifindex_out); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + /* bpf redirect port */ + ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + poll_stats(2, ifindex_out); + +out: + return 0; +} diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 1450f85b946d..36a7ce9e11ff 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void) struct net *net; rtnl_lock(); - for_each_net(net) { - atomic_inc(&net->xfrm.flow_cache_genid); + for_each_net(net) rt_genid_bump_all(net); - } rtnl_unlock(); } #else diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh index 89b25068cd98..80f102860cf8 100755 --- a/tools/hv/bondvf.sh +++ b/tools/hv/bondvf.sh @@ -211,6 +211,30 @@ function create_bond { echo $'\nBond name:' $bondname + if [ $distro == ubuntu ] + then + local mainfn=$cfgdir/interfaces + local s="^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+${bondname}" + + grep -E "$s" $mainfn + if [ $? -eq 0 ] + then + echo "WARNING: ${bondname} has been configured already" + return + fi + elif [ $distro == redhat ] || [ $distro == suse ] + then + local fn=$cfgdir/ifcfg-$bondname + if [ -f $fn ] + then + echo "WARNING: ${bondname} has been configured already" + return + fi + else + echo "Unsupported Distro: ${distro}" + return + fi + echo configuring $primary create_eth_cfg_pri_$distro $primary $bondname @@ -219,8 +243,6 @@ function create_bond { echo creating: $bondname with primary slave: $primary create_bond_cfg_$distro $bondname $primary $secondary - - let bondcnt=bondcnt+1 } for (( i=0; i < $eth_cnt-1; i++ )) @@ -228,5 +250,6 @@ do if [ -n "${list_match[$i]}" ] then create_bond ${list_eth[$i]} ${list_match[$i]} + let bondcnt=bondcnt+1 fi done diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index d50ac342dc92..acbd60519467 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -38,6 +38,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = (void *) BPF_FUNC_clone_redirect; static int (*bpf_redirect)(int ifindex, int flags) = (void *) BPF_FUNC_redirect; +static int (*bpf_redirect_map)(void *map, int key, int flags) = + (void *) BPF_FUNC_redirect_map; static int (*bpf_perf_event_output)(void *ctx, void *map, unsigned long long flags, void *data, int size) = diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 79601c81e169..36d6ac3f0c1c 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -438,6 +438,21 @@ static void test_arraymap_percpu_many_keys(void) close(fd); } +static void test_devmap(int task, void *data) +{ + int next_key, fd; + __u32 key, value; + + fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value), + 2, 0); + if (fd < 0) { + printf("Failed to create arraymap '%s'!\n", strerror(errno)); + exit(1); + } + + close(fd); +} + #define MAP_SIZE (32 * 1024) static void test_map_large(void) |