summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_cfg.h2
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c4
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c561
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h31
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c4
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c6
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c11
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev_api.h4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.h1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c10
-rw-r--r--drivers/net/ethernet/smsc/smsc911x.c49
-rw-r--r--drivers/net/ethernet/smsc/smsc911x.h19
-rw-r--r--drivers/net/hyperv/netvsc.c8
-rw-r--r--drivers/net/hyperv/rndis_filter.c2
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/virtio_net.c8
-rw-r--r--include/linux/netlink.h19
-rw-r--r--include/linux/qed/qed_if.h2
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/ip6_route.h1
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h13
-rw-r--r--lib/test_bpf.c10
-rw-r--r--net/bridge/netfilter/ebt_dnat.c20
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c2
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c26
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c22
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c26
-rw-r--r--net/netfilter/nf_conntrack_netlink.c89
-rw-r--r--net/netfilter/nf_tables_api.c5
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_set_bitmap.c5
-rw-r--r--net/netfilter/x_tables.c4
-rw-r--r--net/netfilter/xt_CT.c11
-rw-r--r--net/netfilter/xt_socket.c2
-rw-r--r--net/openvswitch/conntrack.c30
-rw-r--r--net/sched/cls_matchall.c3
-rw-r--r--samples/bpf/bpf_load.c229
-rw-r--r--samples/bpf/bpf_load.h18
-rw-r--r--samples/bpf/map_perf_test_user.c14
-rw-r--r--samples/bpf/tracex2_user.c7
-rw-r--r--samples/bpf/tracex3_user.c7
-rw-r--r--samples/bpf/tracex4_user.c8
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/gnu/stubs.h1
-rw-r--r--tools/testing/selftests/bpf/test_progs.c16
-rw-r--r--tools/testing/selftests/bpf/test_tcp_estats.c258
54 files changed, 1174 insertions, 437 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f72bf454516d..3f9b7d5382d7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8782,14 +8782,16 @@ F: drivers/net/ethernet/neterion/
NETFILTER
M: Pablo Neira Ayuso <pablo@netfilter.org>
M: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+M: Florian Westphal <fw@strlen.de>
L: netfilter-devel@vger.kernel.org
L: coreteam@netfilter.org
W: http://www.netfilter.org/
W: http://www.iptables.org/
+W: http://www.nftables.org/
Q: http://patchwork.ozlabs.org/project/netfilter-devel/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git
-S: Supported
+S: Maintained
F: include/linux/netfilter*
F: include/linux/netfilter/
F: include/net/netfilter/
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
index 5f99237a9d52..214986436ece 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -68,7 +68,7 @@
#define AQ_CFG_DRV_AUTHOR "aQuantia"
#define AQ_CFG_DRV_DESC "aQuantia Corporation(R) Network Driver"
-#define AQ_CFG_DRV_NAME "aquantia"
+#define AQ_CFG_DRV_NAME "atlantic"
#define AQ_CFG_DRV_VERSION __stringify(NIC_MAJOR_DRIVER_VERSION)"."\
__stringify(NIC_MINOR_DRIVER_VERSION)"."\
__stringify(NIC_BUILD_DRIVER_VERSION)"."\
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index f395b951f5e7..537d571ee601 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -11729,10 +11729,6 @@ static int tg3_close(struct net_device *dev)
tg3_stop(tp);
- /* Clear stats across close / open calls */
- memset(&tp->net_stats_prev, 0, sizeof(tp->net_stats_prev));
- memset(&tp->estats_prev, 0, sizeof(tp->estats_prev));
-
if (pci_device_is_present(tp->pdev)) {
tg3_power_down_prepare(tp);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4fcd2f0378ba..4f2d329dba99 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -194,7 +194,8 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
if (!ltb->buff)
return;
- if (!adapter->failover)
+ if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
+ adapter->reset_reason != VNIC_RESET_MOBILITY)
send_request_unmap(adapter, ltb->map_id);
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
}
@@ -292,9 +293,6 @@ static void replenish_pools(struct ibmvnic_adapter *adapter)
{
int i;
- if (adapter->migrated)
- return;
-
adapter->replenish_task_cycles++;
for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
i++) {
@@ -350,7 +348,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
free_long_term_buff(adapter, &rx_pool->long_term_buff);
if (!rx_pool->rx_buff)
- continue;
+ continue;
for (j = 0; j < rx_pool->size; j++) {
if (rx_pool->rx_buff[j].skb) {
@@ -554,11 +552,20 @@ static int ibmvnic_login(struct net_device *netdev)
static void release_resources(struct ibmvnic_adapter *adapter)
{
+ int i;
+
release_tx_pools(adapter);
release_rx_pools(adapter);
release_stats_token(adapter);
release_error_buffers(adapter);
+
+ if (adapter->napi) {
+ for (i = 0; i < adapter->req_rx_queues; i++) {
+ if (&adapter->napi[i])
+ netif_napi_del(&adapter->napi[i]);
+ }
+ }
}
static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
@@ -569,11 +576,6 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
bool resend;
int rc;
- if (adapter->logical_link_state == link_state) {
- netdev_dbg(netdev, "Link state already %d\n", link_state);
- return 0;
- }
-
netdev_err(netdev, "setting link state %d\n", link_state);
memset(&crq, 0, sizeof(crq));
crq.logical_link_state.first = IBMVNIC_CRQ_CMD;
@@ -624,22 +626,10 @@ static int set_real_num_queues(struct net_device *netdev)
return rc;
}
-static int ibmvnic_open(struct net_device *netdev)
+static int init_resources(struct ibmvnic_adapter *adapter)
{
- struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- struct device *dev = &adapter->vdev->dev;
- int rc = 0;
- int i;
-
- if (adapter->is_closed) {
- rc = ibmvnic_init(adapter);
- if (rc)
- return rc;
- }
-
- rc = ibmvnic_login(netdev);
- if (rc)
- return rc;
+ struct net_device *netdev = adapter->netdev;
+ int i, rc;
rc = set_real_num_queues(netdev);
if (rc)
@@ -647,7 +637,7 @@ static int ibmvnic_open(struct net_device *netdev)
rc = init_sub_crq_irqs(adapter);
if (rc) {
- dev_err(dev, "failed to initialize sub crq irqs\n");
+ netdev_err(netdev, "failed to initialize sub crq irqs\n");
return -1;
}
@@ -659,90 +649,184 @@ static int ibmvnic_open(struct net_device *netdev)
adapter->napi = kcalloc(adapter->req_rx_queues,
sizeof(struct napi_struct), GFP_KERNEL);
if (!adapter->napi)
- goto ibmvnic_open_fail;
+ return -ENOMEM;
+
for (i = 0; i < adapter->req_rx_queues; i++) {
netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
NAPI_POLL_WEIGHT);
- napi_enable(&adapter->napi[i]);
}
send_map_query(adapter);
rc = init_rx_pools(netdev);
if (rc)
- goto ibmvnic_open_fail;
+ return rc;
rc = init_tx_pools(netdev);
- if (rc)
- goto ibmvnic_open_fail;
+ return rc;
+}
+
+static int __ibmvnic_open(struct net_device *netdev)
+{
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ enum vnic_state prev_state = adapter->state;
+ int i, rc;
+ adapter->state = VNIC_OPENING;
replenish_pools(adapter);
+ for (i = 0; i < adapter->req_rx_queues; i++)
+ napi_enable(&adapter->napi[i]);
+
/* We're ready to receive frames, enable the sub-crq interrupts and
* set the logical link state to up
*/
- for (i = 0; i < adapter->req_rx_queues; i++)
- enable_scrq_irq(adapter, adapter->rx_scrq[i]);
+ for (i = 0; i < adapter->req_rx_queues; i++) {
+ if (prev_state == VNIC_CLOSED)
+ enable_irq(adapter->rx_scrq[i]->irq);
+ else
+ enable_scrq_irq(adapter, adapter->rx_scrq[i]);
+ }
- for (i = 0; i < adapter->req_tx_queues; i++)
- enable_scrq_irq(adapter, adapter->tx_scrq[i]);
+ for (i = 0; i < adapter->req_tx_queues; i++) {
+ if (prev_state == VNIC_CLOSED)
+ enable_irq(adapter->tx_scrq[i]->irq);
+ else
+ enable_scrq_irq(adapter, adapter->tx_scrq[i]);
+ }
rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
- if (rc)
- goto ibmvnic_open_fail;
+ if (rc) {
+ for (i = 0; i < adapter->req_rx_queues; i++)
+ napi_disable(&adapter->napi[i]);
+ release_resources(adapter);
+ return rc;
+ }
netif_tx_start_all_queues(netdev);
- adapter->is_closed = false;
- return 0;
+ if (prev_state == VNIC_CLOSED) {
+ for (i = 0; i < adapter->req_rx_queues; i++)
+ napi_schedule(&adapter->napi[i]);
+ }
-ibmvnic_open_fail:
- for (i = 0; i < adapter->req_rx_queues; i++)
- napi_disable(&adapter->napi[i]);
- release_resources(adapter);
- return -ENOMEM;
+ adapter->state = VNIC_OPEN;
+ return rc;
}
-static void disable_sub_crqs(struct ibmvnic_adapter *adapter)
+static int ibmvnic_open(struct net_device *netdev)
{
- int i;
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ int rc;
- if (adapter->tx_scrq) {
- for (i = 0; i < adapter->req_tx_queues; i++)
- if (adapter->tx_scrq[i])
- disable_irq(adapter->tx_scrq[i]->irq);
+ mutex_lock(&adapter->reset_lock);
+
+ if (adapter->state != VNIC_CLOSED) {
+ rc = ibmvnic_login(netdev);
+ if (rc) {
+ mutex_unlock(&adapter->reset_lock);
+ return rc;
+ }
+
+ rc = init_resources(adapter);
+ if (rc) {
+ netdev_err(netdev, "failed to initialize resources\n");
+ release_resources(adapter);
+ mutex_unlock(&adapter->reset_lock);
+ return rc;
+ }
}
- if (adapter->rx_scrq) {
- for (i = 0; i < adapter->req_rx_queues; i++)
- if (adapter->rx_scrq[i])
- disable_irq(adapter->rx_scrq[i]->irq);
+ rc = __ibmvnic_open(netdev);
+ mutex_unlock(&adapter->reset_lock);
+
+ return rc;
+}
+
+static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+{
+ struct ibmvnic_tx_pool *tx_pool;
+ u64 tx_entries;
+ int tx_scrqs;
+ int i, j;
+
+ if (!adapter->tx_pool)
+ return;
+
+ tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+ tx_entries = adapter->req_tx_entries_per_subcrq;
+
+ /* Free any remaining skbs in the tx buffer pools */
+ for (i = 0; i < tx_scrqs; i++) {
+ tx_pool = &adapter->tx_pool[i];
+ if (!tx_pool)
+ continue;
+
+ for (j = 0; j < tx_entries; j++) {
+ if (tx_pool->tx_buff[j].skb) {
+ dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
+ tx_pool->tx_buff[j].skb = NULL;
+ }
+ }
}
}
-static int ibmvnic_close(struct net_device *netdev)
+static int __ibmvnic_close(struct net_device *netdev)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
int rc = 0;
int i;
- adapter->closing = true;
- disable_sub_crqs(adapter);
+ adapter->state = VNIC_CLOSING;
+ netif_tx_stop_all_queues(netdev);
if (adapter->napi) {
for (i = 0; i < adapter->req_rx_queues; i++)
napi_disable(&adapter->napi[i]);
}
- if (!adapter->failover)
- netif_tx_stop_all_queues(netdev);
+ clean_tx_pools(adapter);
+
+ if (adapter->tx_scrq) {
+ for (i = 0; i < adapter->req_tx_queues; i++)
+ if (adapter->tx_scrq[i]->irq)
+ disable_irq(adapter->tx_scrq[i]->irq);
+ }
rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+ if (rc)
+ return rc;
- release_resources(adapter);
+ if (adapter->rx_scrq) {
+ for (i = 0; i < adapter->req_rx_queues; i++) {
+ int retries = 10;
+
+ while (pending_scrq(adapter, adapter->rx_scrq[i])) {
+ retries--;
+ mdelay(100);
+
+ if (retries == 0)
+ break;
+ }
+
+ if (adapter->rx_scrq[i]->irq)
+ disable_irq(adapter->rx_scrq[i]->irq);
+ }
+ }
+
+ adapter->state = VNIC_CLOSED;
+ return rc;
+}
+
+static int ibmvnic_close(struct net_device *netdev)
+{
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ int rc;
+
+ mutex_lock(&adapter->reset_lock);
+ rc = __ibmvnic_close(netdev);
+ mutex_unlock(&adapter->reset_lock);
- adapter->is_closed = true;
- adapter->closing = false;
return rc;
}
@@ -901,13 +985,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
int index = 0;
int ret = 0;
- tx_pool = &adapter->tx_pool[queue_num];
- tx_scrq = adapter->tx_scrq[queue_num];
- txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
- handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
- be32_to_cpu(adapter->login_rsp_buf->
- off_txsubm_subcrqs));
- if (adapter->migrated) {
+ if (adapter->resetting) {
if (!netif_subqueue_stopped(netdev, skb))
netif_stop_subqueue(netdev, queue_num);
dev_kfree_skb_any(skb);
@@ -918,6 +996,12 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
goto out;
}
+ tx_pool = &adapter->tx_pool[queue_num];
+ tx_scrq = adapter->tx_scrq[queue_num];
+ txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
+ handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+ be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
+
index = tx_pool->free_map[tx_pool->consumer_index];
offset = index * adapter->req_mtu;
dst = tx_pool->long_term_buff.buff + offset;
@@ -1099,18 +1183,185 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
return 0;
}
-static void ibmvnic_tx_timeout(struct net_device *dev)
+/**
+ * do_reset returns zero if we are able to keep processing reset events, or
+ * non-zero if we hit a fatal error and must halt.
+ */
+static int do_reset(struct ibmvnic_adapter *adapter,
+ struct ibmvnic_rwi *rwi, u32 reset_state)
{
- struct ibmvnic_adapter *adapter = netdev_priv(dev);
- int rc;
+ struct net_device *netdev = adapter->netdev;
+ int i, rc;
+
+ netif_carrier_off(netdev);
+ adapter->reset_reason = rwi->reset_reason;
+
+ if (rwi->reset_reason == VNIC_RESET_MOBILITY) {
+ rc = ibmvnic_reenable_crq_queue(adapter);
+ if (rc)
+ return 0;
+ }
- /* Adapter timed out, resetting it */
+ rc = __ibmvnic_close(netdev);
+ if (rc)
+ return rc;
+
+ /* remove the closed state so when we call open it appears
+ * we are coming from the probed state.
+ */
+ adapter->state = VNIC_PROBED;
+
+ release_resources(adapter);
release_sub_crqs(adapter);
- rc = ibmvnic_reset_crq(adapter);
+ release_crq_queue(adapter);
+
+ rc = ibmvnic_init(adapter);
if (rc)
- dev_err(&adapter->vdev->dev, "Adapter timeout, reset failed\n");
- else
- ibmvnic_send_crq_init(adapter);
+ return 0;
+
+ /* If the adapter was in PROBE state prior to the reset, exit here. */
+ if (reset_state == VNIC_PROBED)
+ return 0;
+
+ rc = ibmvnic_login(netdev);
+ if (rc) {
+ adapter->state = VNIC_PROBED;
+ return 0;
+ }
+
+ rtnl_lock();
+ rc = init_resources(adapter);
+ rtnl_unlock();
+ if (rc)
+ return rc;
+
+ if (reset_state == VNIC_CLOSED)
+ return 0;
+
+ rc = __ibmvnic_open(netdev);
+ if (rc) {
+ if (list_empty(&adapter->rwi_list))
+ adapter->state = VNIC_CLOSED;
+ else
+ adapter->state = reset_state;
+
+ return 0;
+ }
+
+ netif_carrier_on(netdev);
+
+ /* kick napi */
+ for (i = 0; i < adapter->req_rx_queues; i++)
+ napi_schedule(&adapter->napi[i]);
+
+ return 0;
+}
+
+static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
+{
+ struct ibmvnic_rwi *rwi;
+
+ mutex_lock(&adapter->rwi_lock);
+
+ if (!list_empty(&adapter->rwi_list)) {
+ rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi,
+ list);
+ list_del(&rwi->list);
+ } else {
+ rwi = NULL;
+ }
+
+ mutex_unlock(&adapter->rwi_lock);
+ return rwi;
+}
+
+static void free_all_rwi(struct ibmvnic_adapter *adapter)
+{
+ struct ibmvnic_rwi *rwi;
+
+ rwi = get_next_rwi(adapter);
+ while (rwi) {
+ kfree(rwi);
+ rwi = get_next_rwi(adapter);
+ }
+}
+
+static void __ibmvnic_reset(struct work_struct *work)
+{
+ struct ibmvnic_rwi *rwi;
+ struct ibmvnic_adapter *adapter;
+ struct net_device *netdev;
+ u32 reset_state;
+ int rc;
+
+ adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
+ netdev = adapter->netdev;
+
+ mutex_lock(&adapter->reset_lock);
+ adapter->resetting = true;
+ reset_state = adapter->state;
+
+ rwi = get_next_rwi(adapter);
+ while (rwi) {
+ rc = do_reset(adapter, rwi, reset_state);
+ kfree(rwi);
+ if (rc)
+ break;
+
+ rwi = get_next_rwi(adapter);
+ }
+
+ if (rc) {
+ free_all_rwi(adapter);
+ return;
+ }
+
+ adapter->resetting = false;
+ mutex_unlock(&adapter->reset_lock);
+}
+
+static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
+ enum ibmvnic_reset_reason reason)
+{
+ struct ibmvnic_rwi *rwi, *tmp;
+ struct net_device *netdev = adapter->netdev;
+ struct list_head *entry;
+
+ if (adapter->state == VNIC_REMOVING ||
+ adapter->state == VNIC_REMOVED) {
+ netdev_dbg(netdev, "Adapter removing, skipping reset\n");
+ return;
+ }
+
+ mutex_lock(&adapter->rwi_lock);
+
+ list_for_each(entry, &adapter->rwi_list) {
+ tmp = list_entry(entry, struct ibmvnic_rwi, list);
+ if (tmp->reset_reason == reason) {
+ netdev_err(netdev, "Matching reset found, skipping\n");
+ mutex_unlock(&adapter->rwi_lock);
+ return;
+ }
+ }
+
+ rwi = kzalloc(sizeof(*rwi), GFP_KERNEL);
+ if (!rwi) {
+ mutex_unlock(&adapter->rwi_lock);
+ ibmvnic_close(netdev);
+ return;
+ }
+
+ rwi->reset_reason = reason;
+ list_add_tail(&rwi->list, &adapter->rwi_list);
+ mutex_unlock(&adapter->rwi_lock);
+ schedule_work(&adapter->ibmvnic_reset);
+}
+
+static void ibmvnic_tx_timeout(struct net_device *dev)
+{
+ struct ibmvnic_adapter *adapter = netdev_priv(dev);
+
+ ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT);
}
static void remove_buff_from_pool(struct ibmvnic_adapter *adapter,
@@ -1153,7 +1404,7 @@ restart_poll:
/* free the entry */
next->rx_comp.first = 0;
remove_buff_from_pool(adapter, rx_buff);
- break;
+ continue;
}
length = be32_to_cpu(next->rx_comp.len);
@@ -1177,6 +1428,7 @@ restart_poll:
skb_put(skb, length);
skb->protocol = eth_type_trans(skb, netdev);
+ skb_record_rx_queue(skb, scrq_num);
if (flags & IBMVNIC_IP_CHKSUM_GOOD &&
flags & IBMVNIC_TCP_UDP_CHKSUM_GOOD) {
@@ -1557,19 +1809,8 @@ restart_loop:
}
if (txbuff->last_frag) {
- if (atomic_sub_return(next->tx_comp.num_comps,
- &scrq->used) <=
- (adapter->req_tx_entries_per_subcrq / 2) &&
- netif_subqueue_stopped(adapter->netdev,
- txbuff->skb)) {
- netif_wake_subqueue(adapter->netdev,
- scrq->pool_index);
- netdev_dbg(adapter->netdev,
- "Started queue %d\n",
- scrq->pool_index);
- }
-
dev_kfree_skb_any(txbuff->skb);
+ txbuff->skb = NULL;
}
adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
@@ -1580,6 +1821,15 @@ restart_loop:
}
/* remove tx_comp scrq*/
next->tx_comp.first = 0;
+
+ if (atomic_sub_return(next->tx_comp.num_comps, &scrq->used) <=
+ (adapter->req_tx_entries_per_subcrq / 2) &&
+ __netif_subqueue_stopped(adapter->netdev,
+ scrq->pool_index)) {
+ netif_wake_subqueue(adapter->netdev, scrq->pool_index);
+ netdev_info(adapter->netdev, "Started queue %d\n",
+ scrq->pool_index);
+ }
}
enable_scrq_irq(adapter, scrq);
@@ -1853,7 +2103,8 @@ static int pending_scrq(struct ibmvnic_adapter *adapter,
{
union sub_crq *entry = &scrq->msgs[scrq->cur];
- if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP || adapter->closing)
+ if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP ||
+ adapter->state == VNIC_CLOSING)
return 1;
else
return 0;
@@ -1991,18 +2242,6 @@ static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter)
return ibmvnic_send_crq(adapter, &crq);
}
-static int ibmvnic_send_crq_init_complete(struct ibmvnic_adapter *adapter)
-{
- union ibmvnic_crq crq;
-
- memset(&crq, 0, sizeof(crq));
- crq.generic.first = IBMVNIC_CRQ_INIT_CMD;
- crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE;
- netdev_dbg(adapter->netdev, "Sending CRQ init complete\n");
-
- return ibmvnic_send_crq(adapter, &crq);
-}
-
static int send_version_xchg(struct ibmvnic_adapter *adapter)
{
union ibmvnic_crq crq;
@@ -2500,6 +2739,9 @@ static void handle_error_indication(union ibmvnic_crq *crq,
if (be32_to_cpu(crq->error_indication.error_id))
request_error_information(adapter, crq);
+
+ if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR)
+ ibmvnic_reset(adapter, VNIC_RESET_FATAL);
}
static void handle_change_mac_rsp(union ibmvnic_crq *crq,
@@ -2888,26 +3130,6 @@ out:
}
}
-static void ibmvnic_xport_event(struct work_struct *work)
-{
- struct ibmvnic_adapter *adapter = container_of(work,
- struct ibmvnic_adapter,
- ibmvnic_xport);
- struct device *dev = &adapter->vdev->dev;
- long rc;
-
- release_sub_crqs(adapter);
- if (adapter->migrated) {
- rc = ibmvnic_reenable_crq_queue(adapter);
- if (rc)
- dev_err(dev, "Error after enable rc=%ld\n", rc);
- adapter->migrated = false;
- rc = ibmvnic_send_crq_init(adapter);
- if (rc)
- dev_err(dev, "Error sending init rc=%ld\n", rc);
- }
-}
-
static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
struct ibmvnic_adapter *adapter)
{
@@ -2925,12 +3147,6 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
switch (gen_crq->cmd) {
case IBMVNIC_CRQ_INIT:
dev_info(dev, "Partner initialized\n");
- /* Send back a response */
- rc = ibmvnic_send_crq_init_complete(adapter);
- if (!rc)
- schedule_work(&adapter->vnic_crq_init);
- else
- dev_err(dev, "Can't send initrsp rc=%ld\n", rc);
break;
case IBMVNIC_CRQ_INIT_COMPLETE:
dev_info(dev, "Partner initialization complete\n");
@@ -2941,19 +3157,18 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
}
return;
case IBMVNIC_CRQ_XPORT_EVENT:
+ netif_carrier_off(netdev);
if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
- dev_info(dev, "Re-enabling adapter\n");
- adapter->migrated = true;
- schedule_work(&adapter->ibmvnic_xport);
+ dev_info(dev, "Migrated, re-enabling adapter\n");
+ ibmvnic_reset(adapter, VNIC_RESET_MOBILITY);
} else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) {
dev_info(dev, "Backing device failover detected\n");
- netif_carrier_off(netdev);
- adapter->failover = true;
+ ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
} else {
/* The adapter lost the connection */
dev_err(dev, "Virtual Adapter failed (rc=%d)\n",
gen_crq->cmd);
- schedule_work(&adapter->ibmvnic_xport);
+ ibmvnic_reset(adapter, VNIC_RESET_FATAL);
}
return;
case IBMVNIC_CRQ_CMD_RSP:
@@ -3234,64 +3449,6 @@ map_failed:
return retrc;
}
-static void handle_crq_init_rsp(struct work_struct *work)
-{
- struct ibmvnic_adapter *adapter = container_of(work,
- struct ibmvnic_adapter,
- vnic_crq_init);
- struct device *dev = &adapter->vdev->dev;
- struct net_device *netdev = adapter->netdev;
- unsigned long timeout = msecs_to_jiffies(30000);
- bool restart = false;
- int rc;
-
- if (adapter->failover) {
- release_sub_crqs(adapter);
- if (netif_running(netdev)) {
- netif_tx_disable(netdev);
- ibmvnic_close(netdev);
- restart = true;
- }
- }
-
- reinit_completion(&adapter->init_done);
- send_version_xchg(adapter);
- if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
- dev_err(dev, "Passive init timeout\n");
- goto task_failed;
- }
-
- netdev->mtu = adapter->req_mtu - ETH_HLEN;
-
- if (adapter->failover) {
- adapter->failover = false;
- if (restart) {
- rc = ibmvnic_open(netdev);
- if (rc)
- goto restart_failed;
- }
- netif_carrier_on(netdev);
- return;
- }
-
- rc = register_netdev(netdev);
- if (rc) {
- dev_err(dev,
- "failed to register netdev rc=%d\n", rc);
- goto register_failed;
- }
- dev_info(dev, "ibmvnic registered\n");
-
- return;
-
-restart_failed:
- dev_err(dev, "Failed to restart ibmvnic, rc=%d\n", rc);
-register_failed:
- release_sub_crqs(adapter);
-task_failed:
- dev_err(dev, "Passive initialization was not successful\n");
-}
-
static int ibmvnic_init(struct ibmvnic_adapter *adapter)
{
struct device *dev = &adapter->vdev->dev;
@@ -3346,10 +3503,10 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
return -ENOMEM;
adapter = netdev_priv(netdev);
+ adapter->state = VNIC_PROBING;
dev_set_drvdata(&dev->dev, netdev);
adapter->vdev = dev;
adapter->netdev = netdev;
- adapter->failover = false;
ether_addr_copy(adapter->mac_addr, mac_addr_p);
ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
@@ -3358,14 +3515,17 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
netdev->ethtool_ops = &ibmvnic_ethtool_ops;
SET_NETDEV_DEV(netdev, &dev->dev);
- INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp);
- INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event);
-
spin_lock_init(&adapter->stats_lock);
INIT_LIST_HEAD(&adapter->errors);
spin_lock_init(&adapter->error_list_lock);
+ INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
+ INIT_LIST_HEAD(&adapter->rwi_list);
+ mutex_init(&adapter->reset_lock);
+ mutex_init(&adapter->rwi_lock);
+ adapter->resetting = false;
+
rc = ibmvnic_init(adapter);
if (rc) {
free_netdev(netdev);
@@ -3373,7 +3533,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
}
netdev->mtu = adapter->req_mtu - ETH_HLEN;
- adapter->is_closed = false;
rc = register_netdev(netdev);
if (rc) {
@@ -3383,6 +3542,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
}
dev_info(&dev->dev, "ibmvnic registered\n");
+ adapter->state = VNIC_PROBED;
return 0;
}
@@ -3391,12 +3551,17 @@ static int ibmvnic_remove(struct vio_dev *dev)
struct net_device *netdev = dev_get_drvdata(&dev->dev);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ adapter->state = VNIC_REMOVING;
unregister_netdev(netdev);
+ mutex_lock(&adapter->reset_lock);
release_resources(adapter);
release_sub_crqs(adapter);
release_crq_queue(adapter);
+ adapter->state = VNIC_REMOVED;
+
+ mutex_unlock(&adapter->reset_lock);
free_netdev(netdev);
dev_set_drvdata(&dev->dev, NULL);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index a69979f6f19d..4702b48cfa44 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -913,6 +913,25 @@ struct ibmvnic_error_buff {
__be32 error_id;
};
+enum vnic_state {VNIC_PROBING = 1,
+ VNIC_PROBED,
+ VNIC_OPENING,
+ VNIC_OPEN,
+ VNIC_CLOSING,
+ VNIC_CLOSED,
+ VNIC_REMOVING,
+ VNIC_REMOVED};
+
+enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
+ VNIC_RESET_MOBILITY,
+ VNIC_RESET_FATAL,
+ VNIC_RESET_TIMEOUT};
+
+struct ibmvnic_rwi {
+ enum ibmvnic_reset_reason reset_reason;
+ struct list_head list;
+};
+
struct ibmvnic_adapter {
struct vio_dev *vdev;
struct net_device *netdev;
@@ -922,7 +941,6 @@ struct ibmvnic_adapter {
dma_addr_t ip_offload_tok;
struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl;
dma_addr_t ip_offload_ctrl_tok;
- bool migrated;
u32 msg_enable;
/* Statistics */
@@ -962,7 +980,6 @@ struct ibmvnic_adapter {
u64 promisc;
struct ibmvnic_tx_pool *tx_pool;
- bool closing;
struct completion init_done;
int init_done_rc;
@@ -1007,9 +1024,11 @@ struct ibmvnic_adapter {
__be64 tx_rx_desc_req;
u8 map_id;
- struct work_struct vnic_crq_init;
- struct work_struct ibmvnic_xport;
struct tasklet_struct tasklet;
- bool failover;
- bool is_closed;
+ enum vnic_state state;
+ enum ibmvnic_reset_reason reset_reason;
+ struct mutex reset_lock, rwi_lock;
+ struct list_head rwi_list;
+ struct work_struct ibmvnic_reset;
+ bool resetting;
};
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index db20376260f5..82bd6b0935f1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2532,11 +2532,11 @@ nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp,
if (!dp->xdp_prog)
return 0;
if (dp->fl_bufsz > PAGE_SIZE) {
- NL_MOD_TRY_SET_ERR_MSG(extack, "MTU too large w/ XDP enabled");
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large w/ XDP enabled");
return -EINVAL;
}
if (dp->num_tx_rings > nn->max_tx_rings) {
- NL_MOD_TRY_SET_ERR_MSG(extack, "Insufficient number of TX rings w/ XDP enabled");
+ NL_SET_ERR_MSG_MOD(extack, "Insufficient number of TX rings w/ XDP enabled");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 978d32944c80..aa912f43e15f 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -4248,11 +4248,9 @@ static int nv_get_link_ksettings(struct net_device *dev,
/* We do not track link speed / duplex setting if the
* interface is disabled. Force a link check */
if (nv_update_linkspeed(dev)) {
- if (!netif_carrier_ok(dev))
- netif_carrier_on(dev);
+ netif_carrier_on(dev);
} else {
- if (netif_carrier_ok(dev))
- netif_carrier_off(dev);
+ netif_carrier_off(dev);
}
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 5f31140d0b77..bb70522ad362 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2536,6 +2536,9 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n", link_temp);
}
+ p_hwfn->mcp_info->link_capabilities.default_speed_autoneg =
+ link->speed.autoneg;
+
link_temp &= NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK;
link_temp >>= NVM_CFG1_PORT_DRV_FLOW_CONTROL_OFFSET;
link->pause.autoneg = !!(link_temp &
@@ -3586,7 +3589,7 @@ static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
}
int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
- u16 coalesce, u8 qid, u16 sb_id)
+ u16 coalesce, u16 qid, u16 sb_id)
{
struct ustorm_eth_queue_zone eth_qzone;
u8 timeset, timer_res;
@@ -3607,7 +3610,7 @@ int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
}
timeset = (u8)(coalesce >> timer_res);
- rc = qed_fw_l2_queue(p_hwfn, (u16)qid, &fw_qid);
+ rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid);
if (rc)
return rc;
@@ -3628,7 +3631,7 @@ out:
}
int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
- u16 coalesce, u8 qid, u16 sb_id)
+ u16 coalesce, u16 qid, u16 sb_id)
{
struct xstorm_eth_queue_zone eth_qzone;
u8 timeset, timer_res;
@@ -3649,7 +3652,7 @@ int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
}
timeset = (u8)(coalesce >> timer_res);
- rc = qed_fw_l2_queue(p_hwfn, (u16)qid, &fw_qid);
+ rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid);
if (rc)
return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index cefe3ee9064a..12d16c096e36 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -454,7 +454,7 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn,
* @return int
*/
int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
- u16 coalesce, u8 qid, u16 sb_id);
+ u16 coalesce, u16 qid, u16 sb_id);
/**
* @brief qed_set_txq_coalesce - Configure coalesce parameters for a Tx queue
@@ -471,7 +471,7 @@ int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
* @return int
*/
int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
- u16 coalesce, u8 qid, u16 sb_id);
+ u16 coalesce, u16 qid, u16 sb_id);
const char *qed_hw_get_resc_name(enum qed_resources res_id);
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 59992cf20d42..b7ad36b91e12 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1372,7 +1372,7 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
/* TODO - at the moment assume supported and advertised speed equal */
if_link->supported_caps = QED_LM_FIBRE_BIT;
- if (params.speed.autoneg)
+ if (link_caps.default_speed_autoneg)
if_link->supported_caps |= QED_LM_Autoneg_BIT;
if (params.pause.autoneg ||
(params.pause.forced_rx && params.pause.forced_tx))
@@ -1382,6 +1382,10 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
if_link->supported_caps |= QED_LM_Pause_BIT;
if_link->advertised_caps = if_link->supported_caps;
+ if (params.speed.autoneg)
+ if_link->advertised_caps |= QED_LM_Autoneg_BIT;
+ else
+ if_link->advertised_caps &= ~QED_LM_Autoneg_BIT;
if (params.speed.advertised_speeds &
NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT |
@@ -1521,7 +1525,7 @@ static void qed_get_coalesce(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal)
}
static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
- u8 qid, u16 sb_id)
+ u16 qid, u16 sb_id)
{
struct qed_hwfn *hwfn;
struct qed_ptt *ptt;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 5ae35d6cc7d1..2b09b8545236 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -61,6 +61,7 @@ struct qed_mcp_link_params {
struct qed_mcp_link_capabilities {
u32 speed_capabilities;
+ bool default_speed_autoneg;
};
struct qed_mcp_link_state {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 4dcfe9614731..172b292241a5 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -493,6 +493,11 @@ static int qede_set_link_ksettings(struct net_device *dev,
params.override_flags |= QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS;
params.override_flags |= QED_LINK_OVERRIDE_SPEED_AUTONEG;
if (base->autoneg == AUTONEG_ENABLE) {
+ if (!(current_link.supported_caps & QED_LM_Autoneg_BIT)) {
+ DP_INFO(edev, "Auto negotiation is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
params.autoneg = true;
params.forced_speed = 0;
QEDE_ETHTOOL_TO_DRV_CAPS(params.adv_speeds, cmd, advertising)
@@ -706,8 +711,7 @@ static int qede_set_coalesce(struct net_device *dev,
{
struct qede_dev *edev = netdev_priv(dev);
int i, rc = 0;
- u16 rxc, txc;
- u8 sb_id;
+ u16 rxc, txc, sb_id;
if (!netif_running(dev)) {
DP_INFO(edev, "Interface is down\n");
@@ -729,7 +733,7 @@ static int qede_set_coalesce(struct net_device *dev,
for_each_queue(i) {
sb_id = edev->fp_array[i].sb_info->igu_sb_id;
rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc,
- (u8)i, sb_id);
+ (u16)i, sb_id);
if (rc) {
DP_INFO(edev, "Set coalesce error, rc = %d\n", rc);
return rc;
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index fa5ca0992be6..ea1bbc355b4d 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -25,7 +25,7 @@
* LAN9215, LAN9216, LAN9217, LAN9218
* LAN9210, LAN9211
* LAN9220, LAN9221
- * LAN89218
+ * LAN89218,LAN9250
*
*/
@@ -1450,6 +1450,8 @@ static int smsc911x_soft_reset(struct smsc911x_data *pdata)
unsigned int timeout;
unsigned int temp;
int ret;
+ unsigned int reset_offset = HW_CFG;
+ unsigned int reset_mask = HW_CFG_SRST_;
/*
* Make sure to power-up the PHY chip before doing a reset, otherwise
@@ -1476,15 +1478,23 @@ static int smsc911x_soft_reset(struct smsc911x_data *pdata)
}
}
+ if ((pdata->idrev & 0xFFFF0000) == LAN9250) {
+ /* special reset for LAN9250 */
+ reset_offset = RESET_CTL;
+ reset_mask = RESET_CTL_DIGITAL_RST_;
+ }
+
/* Reset the LAN911x */
- smsc911x_reg_write(pdata, HW_CFG, HW_CFG_SRST_);
+ smsc911x_reg_write(pdata, reset_offset, reset_mask);
+
+ /* verify reset bit is cleared */
timeout = 10;
do {
udelay(10);
- temp = smsc911x_reg_read(pdata, HW_CFG);
- } while ((--timeout) && (temp & HW_CFG_SRST_));
+ temp = smsc911x_reg_read(pdata, reset_offset);
+ } while ((--timeout) && (temp & reset_mask));
- if (unlikely(temp & HW_CFG_SRST_)) {
+ if (unlikely(temp & reset_mask)) {
SMSC_WARN(pdata, drv, "Failed to complete reset");
return -EIO;
}
@@ -2253,28 +2263,29 @@ static int smsc911x_init(struct net_device *dev)
pdata->idrev = smsc911x_reg_read(pdata, ID_REV);
switch (pdata->idrev & 0xFFFF0000) {
- case 0x01180000:
- case 0x01170000:
- case 0x01160000:
- case 0x01150000:
- case 0x218A0000:
+ case LAN9118:
+ case LAN9117:
+ case LAN9116:
+ case LAN9115:
+ case LAN89218:
/* LAN911[5678] family */
pdata->generation = pdata->idrev & 0x0000FFFF;
break;
- case 0x118A0000:
- case 0x117A0000:
- case 0x116A0000:
- case 0x115A0000:
+ case LAN9218:
+ case LAN9217:
+ case LAN9216:
+ case LAN9215:
/* LAN921[5678] family */
pdata->generation = 3;
break;
- case 0x92100000:
- case 0x92110000:
- case 0x92200000:
- case 0x92210000:
- /* LAN9210/LAN9211/LAN9220/LAN9221 */
+ case LAN9210:
+ case LAN9211:
+ case LAN9220:
+ case LAN9221:
+ case LAN9250:
+ /* LAN9210/LAN9211/LAN9220/LAN9221/LAN9250 */
pdata->generation = 4;
break;
diff --git a/drivers/net/ethernet/smsc/smsc911x.h b/drivers/net/ethernet/smsc/smsc911x.h
index 54d648920a1b..8d75508acd2b 100644
--- a/drivers/net/ethernet/smsc/smsc911x.h
+++ b/drivers/net/ethernet/smsc/smsc911x.h
@@ -20,6 +20,22 @@
#ifndef __SMSC911X_H__
#define __SMSC911X_H__
+/*Chip ID*/
+#define LAN9115 0x01150000
+#define LAN9116 0x01160000
+#define LAN9117 0x01170000
+#define LAN9118 0x01180000
+#define LAN9215 0x115A0000
+#define LAN9216 0x116A0000
+#define LAN9217 0x117A0000
+#define LAN9218 0x118A0000
+#define LAN9210 0x92100000
+#define LAN9211 0x92110000
+#define LAN9220 0x92200000
+#define LAN9221 0x92210000
+#define LAN9250 0x92500000
+#define LAN89218 0x218A0000
+
#define TX_FIFO_LOW_THRESHOLD ((u32)1600)
#define SMSC911X_EEPROM_SIZE ((u32)128)
#define USE_DEBUG 0
@@ -303,6 +319,9 @@
#define E2P_DATA_EEPROM_DATA_ 0x000000FF
#define LAN_REGISTER_EXTENT 0x00000100
+#define RESET_CTL 0x1F8
+#define RESET_CTL_DIGITAL_RST_ 0x00000001
+
/*
* MAC Control and Status Register (Indirect Address)
* Offset (through the MAC_CSR CMD and DATA port)
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 15749d359e60..652453d9fb08 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1322,6 +1322,10 @@ int netvsc_device_add(struct hv_device *device,
nvchan->channel = device->channel;
}
+ /* Enable NAPI handler before init callbacks */
+ netif_napi_add(ndev, &net_device->chan_table[0].napi,
+ netvsc_poll, NAPI_POLL_WEIGHT);
+
/* Open the channel */
ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
ring_size * PAGE_SIZE, NULL, 0,
@@ -1329,6 +1333,7 @@ int netvsc_device_add(struct hv_device *device,
net_device->chan_table);
if (ret != 0) {
+ netif_napi_del(&net_device->chan_table[0].napi);
netdev_err(ndev, "unable to open channel: %d\n", ret);
goto cleanup;
}
@@ -1336,9 +1341,6 @@ int netvsc_device_add(struct hv_device *device,
/* Channel is opened */
netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
- /* Enable NAPI handler for init callbacks */
- netif_napi_add(ndev, &net_device->chan_table[0].napi,
- netvsc_poll, NAPI_POLL_WEIGHT);
napi_enable(&net_device->chan_table[0].napi);
/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index ab92c3c95951..f9d5b0b8209a 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1018,7 +1018,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
if (ret == 0)
napi_enable(&nvchan->napi);
else
- netdev_err(ndev, "sub channel open failed (%d)\n", ret);
+ netif_napi_del(&nvchan->napi);
if (refcount_dec_and_test(&nvscdev->sc_offered))
complete(&nvscdev->channel_init_wait);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index a3ed8115747c..d7165767ca9d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1201,6 +1201,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
{QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */
+ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */
{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3d0bc484b3d7..1c6d3923c224 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1891,17 +1891,17 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) {
- NL_SET_ERR_MSG(extack, "can't set XDP while host is implementing LRO, disable LRO first");
+ NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
return -EOPNOTSUPP;
}
if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
- NL_SET_ERR_MSG(extack, "XDP expects header/data in single page, any_header_sg required");
+ NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
return -EINVAL;
}
if (dev->mtu > max_sz) {
- NL_SET_ERR_MSG(extack, "MTU too large to enable XDP");
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
return -EINVAL;
}
@@ -1912,7 +1912,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
/* XDP requires extra queues for XDP_TX */
if (curr_qp + xdp_qp > vi->max_queue_pairs) {
- NL_SET_ERR_MSG(extack, "Too few free TX rings available");
+ NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
netdev_warn(dev, "request %i queues but max is %i\n",
curr_qp + xdp_qp, vi->max_queue_pairs);
return -ENOMEM;
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index c20395edf2de..5fff5ba5964e 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -86,19 +86,16 @@ struct netlink_ext_ack {
* Currently string formatting is not supported (due
* to the lack of an output buffer.)
*/
-#define NL_SET_ERR_MSG(extack, msg) do { \
- static const char _msg[] = (msg); \
- \
- (extack)->_msg = _msg; \
+#define NL_SET_ERR_MSG(extack, msg) do { \
+ static const char __msg[] = (msg); \
+ struct netlink_ext_ack *__extack = (extack); \
+ \
+ if (__extack) \
+ __extack->_msg = __msg; \
} while (0)
-#define NL_MOD_TRY_SET_ERR_MSG(extack, msg) do { \
- static const char _msg[] = KBUILD_MODNAME ": " msg; \
- struct netlink_ext_ack *_extack = (extack); \
- \
- if (_extack) \
- _extack->_msg = _msg; \
-} while (0)
+#define NL_SET_ERR_MSG_MOD(extack, msg) \
+ NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg)
extern void netlink_kernel_release(struct sock *sk);
extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 5544d7b2f2bb..c70ac13a97e6 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -635,7 +635,7 @@ struct qed_common_ops {
* @return 0 on success, error otherwise.
*/
int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
- u8 qid, u16 sb_id);
+ u16 qid, u16 sb_id);
/**
* @brief set_led - Configure LED mode
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6e90f1a4950f..15d6599b8bc6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1013,9 +1013,9 @@ enum rate_info_flags {
* @RATE_INFO_BW_160: 160 MHz bandwidth
*/
enum rate_info_bw {
+ RATE_INFO_BW_20 = 0,
RATE_INFO_BW_5,
RATE_INFO_BW_10,
- RATE_INFO_BW_20,
RATE_INFO_BW_40,
RATE_INFO_BW_80,
RATE_INFO_BW_160,
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9dc2c182a263..f5e625f53367 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -84,6 +84,7 @@ struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int ifindex, struct flowi6 *fl6, int flags);
+void ip6_route_init_special_entries(void);
int ip6_route_init(void);
void ip6_route_cleanup(void);
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index a8072cc7fa0b..dc947e59d03a 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -84,10 +84,6 @@ enum ip_conntrack_status {
IPS_DYING_BIT = 9,
IPS_DYING = (1 << IPS_DYING_BIT),
- /* Bits that cannot be altered from userland. */
- IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
- IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING),
-
/* Connection has fixed timeout. */
IPS_FIXED_TIMEOUT_BIT = 10,
IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
@@ -103,6 +99,15 @@ enum ip_conntrack_status {
/* Conntrack got a helper explicitly attached via CT target. */
IPS_HELPER_BIT = 13,
IPS_HELPER = (1 << IPS_HELPER_BIT),
+
+ /* Be careful here, modifying these bits can make things messy,
+ * so don't let users modify them directly.
+ */
+ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
+ IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
+ IPS_SEQ_ADJUST | IPS_TEMPLATE),
+
+ __IPS_MAX_BIT = 14,
};
/* Connection tracking event types */
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index a0f66280ea50..889bc31785be 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -4769,8 +4769,8 @@ static struct bpf_test tests[] = {
BPF_LD_IMM64(R1, 3),
BPF_LD_IMM64(R2, 2),
BPF_JMP_REG(BPF_JGE, R1, R2, 2),
- BPF_LD_IMM64(R0, 0xffffffffffffffffUL),
- BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeUL),
+ BPF_LD_IMM64(R0, 0xffffffffffffffffULL),
+ BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL),
BPF_EXIT_INSN(),
},
INTERNAL,
@@ -4784,7 +4784,7 @@ static struct bpf_test tests[] = {
BPF_LD_IMM64(R1, 3),
BPF_LD_IMM64(R2, 2),
BPF_JMP_REG(BPF_JGE, R1, R2, 0),
- BPF_LD_IMM64(R0, 0xffffffffffffffffUL),
+ BPF_LD_IMM64(R0, 0xffffffffffffffffULL),
BPF_EXIT_INSN(),
},
INTERNAL,
@@ -4798,8 +4798,8 @@ static struct bpf_test tests[] = {
BPF_LD_IMM64(R1, 3),
BPF_LD_IMM64(R2, 2),
BPF_JMP_REG(BPF_JGE, R1, R2, 4),
- BPF_LD_IMM64(R0, 0xffffffffffffffffUL),
- BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeUL),
+ BPF_LD_IMM64(R0, 0xffffffffffffffffULL),
+ BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL),
BPF_EXIT_INSN(),
},
INTERNAL,
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 4e0b0c359325..e0bb624c3845 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -9,6 +9,7 @@
*/
#include <linux/module.h>
#include <net/sock.h>
+#include "../br_private.h"
#include <linux/netfilter.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
@@ -18,11 +19,30 @@ static unsigned int
ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nat_info *info = par->targinfo;
+ struct net_device *dev;
if (!skb_make_writable(skb, 0))
return EBT_DROP;
ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
+
+ if (is_multicast_ether_addr(info->mac)) {
+ if (is_broadcast_ether_addr(info->mac))
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+ } else {
+ if (xt_hooknum(par) != NF_BR_BROUTING)
+ dev = br_port_get_rcu(xt_in(par))->br->dev;
+ else
+ dev = xt_in(par);
+
+ if (ether_addr_equal(info->mac, dev->dev_addr))
+ skb->pkt_type = PACKET_HOST;
+ else
+ skb->pkt_type = PACKET_OTHERHOST;
+ }
+
return info->target;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6e67315ec368..bcb0f610ee42 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1054,7 +1054,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
return err;
}
- if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name))
+ if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name))
return -EMSGSIZE;
return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9d943974de2b..bdffad875691 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -358,6 +358,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
rt->dst.dev->mtu);
return -EMSGSIZE;
}
+ if (length < sizeof(struct iphdr))
+ return -EINVAL;
+
if (flags&MSG_PROBE)
goto out;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8f6373b0cd77..717be4de5324 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -523,6 +523,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
tcp_ecn_openreq_child(newtp, req);
+ newtp->fastopen_req = NULL;
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
newtp->rack.mstamp.v64 = 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b09ac38d8dc4..77a4bd526d6e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3328,7 +3328,8 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
idev->dev, 0, 0);
}
- addrconf_dad_start(ifp);
+ if (ifp->state == INET6_IFADDR_STATE_PREDAD)
+ addrconf_dad_start(ifp);
return 0;
}
@@ -3683,7 +3684,7 @@ restart:
if (keep) {
/* set state to skip the notifier below */
state = INET6_IFADDR_STATE_DEAD;
- ifa->state = 0;
+ ifa->state = INET6_IFADDR_STATE_PREDAD;
if (!(ifa->flags & IFA_F_NODAD))
ifa->flags |= IFA_F_TENTATIVE;
@@ -6572,6 +6573,8 @@ int __init addrconf_init(void)
goto errlo;
}
+ ip6_route_init_special_entries();
+
for (i = 0; i < IN6_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&inet6_addr_lst[i]);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index bf3ad3e7b647..b2b4f031b3a1 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -235,7 +235,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
inside->icmp6.icmp6_cksum =
csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
skb->len - hdrlen, IPPROTO_ICMPV6,
- csum_partial(&inside->icmp6,
+ skb_checksum(skb, hdrlen,
skb->len - hdrlen, 0));
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0da6a12b5472..1f992d9e261d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -632,6 +632,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
return -EMSGSIZE;
}
+ if (length < sizeof(struct ipv6hdr))
+ return -EINVAL;
if (flags&MSG_PROBE)
goto out;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a1bf426c959b..2f1136627dcb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4027,6 +4027,21 @@ static struct notifier_block ip6_route_dev_notifier = {
.priority = 0,
};
+void __init ip6_route_init_special_entries(void)
+{
+ /* Registering of the loopback is done before this portion of code,
+ * the loopback reference in rt6_info will not be taken, do it
+ * manually for init_net */
+ init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #endif
+}
+
int __init ip6_route_init(void)
{
int ret;
@@ -4053,17 +4068,6 @@ int __init ip6_route_init(void)
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
- /* Registering of the loopback is done before this portion of code,
- * the loopback reference in rt6_info will not be taken, do it
- * manually for init_net */
- init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- #ifdef CONFIG_IPV6_MULTIPLE_TABLES
- init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- #endif
ret = fib6_init();
if (ret)
goto out_register_subsys;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 668d9643f0cc..1fa3c2307b6e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3078,6 +3078,17 @@ nla_put_failure:
return skb->len;
}
+static bool ip_vs_is_af_valid(int af)
+{
+ if (af == AF_INET)
+ return true;
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6 && ipv6_mod_enabled())
+ return true;
+#endif
+ return false;
+}
+
static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
@@ -3105,11 +3116,7 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
memset(usvc, 0, sizeof(*usvc));
usvc->af = nla_get_u16(nla_af);
-#ifdef CONFIG_IP_VS_IPV6
- if (usvc->af != AF_INET && usvc->af != AF_INET6)
-#else
- if (usvc->af != AF_INET)
-#endif
+ if (!ip_vs_is_af_valid(usvc->af))
return -EAFNOSUPPORT;
if (nla_fwmark) {
@@ -3612,6 +3619,11 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
if (udest.af == 0)
udest.af = svc->af;
+ if (!ip_vs_is_af_valid(udest.af)) {
+ ret = -EAFNOSUPPORT;
+ goto out;
+ }
+
if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
/* The synchronization protocol is incompatible
* with mixed family services
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f9245dbfe435..3c8f1ed2f555 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1853,7 +1853,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
-static unsigned int total_extension_size(void)
+static __always_inline unsigned int total_extension_size(void)
{
/* remember to add new extensions below */
BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 4b9dfe3eef62..3a60efa7799b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -385,7 +385,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
unsigned int h = helper_hash(&me->tuple);
struct nf_conntrack_helper *cur;
- int ret = 0;
+ int ret = 0, i;
BUG_ON(me->expect_policy == NULL);
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
@@ -395,10 +395,26 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
return -EINVAL;
mutex_lock(&nf_ct_helper_mutex);
- hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
- if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) {
- ret = -EEXIST;
- goto out;
+ for (i = 0; i < nf_ct_helper_hsize; i++) {
+ hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) {
+ if (!strcmp(cur->name, me->name) &&
+ (cur->tuple.src.l3num == NFPROTO_UNSPEC ||
+ cur->tuple.src.l3num == me->tuple.src.l3num) &&
+ cur->tuple.dst.protonum == me->tuple.dst.protonum) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ }
+
+ /* avoid unpredictable behaviour for auto_assign_helper */
+ if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) {
+ hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
+ if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple,
+ &mask)) {
+ ret = -EEXIST;
+ goto out;
+ }
}
}
hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5f6f2f388928..dcf561b5c97a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -417,8 +417,7 @@ nla_put_failure:
return -1;
}
-static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
- const struct nf_conn *ct)
+static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
{
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
struct nf_ct_seqadj *seq;
@@ -426,15 +425,20 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
return 0;
+ spin_lock_bh(&ct->lock);
seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
- return -1;
+ goto err;
seq = &seqadj->seq[IP_CT_DIR_REPLY];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
- return -1;
+ goto err;
+ spin_unlock_bh(&ct->lock);
return 0;
+err:
+ spin_unlock_bh(&ct->lock);
+ return -1;
}
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -1417,6 +1421,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
}
#endif
+static void
+__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
+ unsigned long off)
+{
+ unsigned int bit;
+
+ /* Ignore these unchangable bits */
+ on &= ~IPS_UNCHANGEABLE_MASK;
+ off &= ~IPS_UNCHANGEABLE_MASK;
+
+ for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+ if (on & (1 << bit))
+ set_bit(bit, &ct->status);
+ else if (off & (1 << bit))
+ clear_bit(bit, &ct->status);
+ }
+}
+
static int
ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
{
@@ -1436,10 +1458,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
/* ASSURED bit can only be set */
return -EBUSY;
- /* Be careful here, modifying NAT bits can screw up things,
- * so don't let users modify them directly if they don't pass
- * nf_nat_range. */
- ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+ __ctnetlink_change_status(ct, status, 0);
return 0;
}
@@ -1508,23 +1527,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
return 0;
}
+ rcu_read_lock();
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL) {
-#ifdef CONFIG_MODULES
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
- if (request_module("nfct-helper-%s", helpname) < 0) {
- spin_lock_bh(&nf_conntrack_expect_lock);
- return -EOPNOTSUPP;
- }
-
- spin_lock_bh(&nf_conntrack_expect_lock);
- helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper)
- return -EAGAIN;
-#endif
+ rcu_read_unlock();
return -EOPNOTSUPP;
}
@@ -1533,13 +1540,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
/* update private helper data if allowed. */
if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
- return 0;
+ err = 0;
} else
- return -EBUSY;
+ err = -EBUSY;
+ } else {
+ /* we cannot set a helper for an existing conntrack */
+ err = -EOPNOTSUPP;
}
- /* we cannot set a helper for an existing conntrack */
- return -EOPNOTSUPP;
+ rcu_read_unlock();
+ return err;
}
static int ctnetlink_change_timeout(struct nf_conn *ct,
@@ -1630,25 +1640,30 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
if (!seqadj)
return 0;
+ spin_lock_bh(&ct->lock);
if (cda[CTA_SEQ_ADJ_ORIG]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
cda[CTA_SEQ_ADJ_ORIG]);
if (ret < 0)
- return ret;
+ goto err;
- ct->status |= IPS_SEQ_ADJUST;
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}
if (cda[CTA_SEQ_ADJ_REPLY]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
cda[CTA_SEQ_ADJ_REPLY]);
if (ret < 0)
- return ret;
+ goto err;
- ct->status |= IPS_SEQ_ADJUST;
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}
+ spin_unlock_bh(&ct->lock);
return 0;
+err:
+ spin_unlock_bh(&ct->lock);
+ return ret;
}
static int
@@ -1959,9 +1974,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
- spin_lock_bh(&nf_conntrack_expect_lock);
err = ctnetlink_change_conntrack(ct, cda);
- spin_unlock_bh(&nf_conntrack_expect_lock);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) |
@@ -2294,10 +2307,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
/* This check is less strict than ctnetlink_change_status()
* because callers often flip IPS_EXPECTED bits when sending
* an NFQA_CT attribute to the kernel. So ignore the
- * unchangeable bits but do not error out.
+ * unchangeable bits but do not error out. Also user programs
+ * are allowed to clear the bits that they are allowed to change.
*/
- ct->status = (status & ~IPS_UNCHANGEABLE_MASK) |
- (ct->status & IPS_UNCHANGEABLE_MASK);
+ __ctnetlink_change_status(ct, status, ~status);
return 0;
}
@@ -2351,11 +2364,7 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
if (ret < 0)
return ret;
- spin_lock_bh(&nf_conntrack_expect_lock);
- ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
- spin_unlock_bh(&nf_conntrack_expect_lock);
-
- return ret;
+ return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
}
static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1c6482d2c4dc..559225029740 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3778,6 +3778,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = set->ops->insert(ctx->net, set, &elem, &ext2);
if (err) {
if (err == -EEXIST) {
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
+ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
+ nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
+ nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
+ return -EBUSY;
if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
memcmp(nft_set_ext_data(ext),
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 3948da380259..66221ad891a9 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -82,8 +82,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
timeout = priv->timeout ? : set->timeout;
*nft_set_ext_expiration(ext) = jiffies + timeout;
- } else if (sexpr == NULL)
- goto out;
+ }
if (sexpr != NULL)
sexpr->ops->eval(sexpr, regs, pkt);
@@ -92,7 +91,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
return;
}
-out:
+
if (!priv->invert)
regs->verdict.code = NFT_BREAK;
}
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 8ebbc2940f4c..b988162b5b15 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -257,6 +257,11 @@ static int nft_bitmap_init(const struct nft_set *set,
static void nft_bitmap_destroy(const struct nft_set *set)
{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_bitmap_elem *be, *n;
+
+ list_for_each_entry_safe(be, n, &priv->list, head)
+ nft_set_elem_destroy(set, be, true);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 14857afc9937..f134d384852f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1051,8 +1051,10 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
list_for_each_entry(t, &init_net.xt.tables[af], list) {
if (strcmp(t->name, name))
continue;
- if (!try_module_get(t->me))
+ if (!try_module_get(t->me)) {
+ mutex_unlock(&xt[af].mutex);
return NULL;
+ }
mutex_unlock(&xt[af].mutex);
if (t->table_init(net) != 0) {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 3cbe1bcf6a74..bb7ad82dcd56 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -168,8 +168,10 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
goto err_put_timeout;
}
timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
- if (timeout_ext == NULL)
+ if (!timeout_ext) {
ret = -ENOMEM;
+ goto err_put_timeout;
+ }
rcu_read_unlock();
return ret;
@@ -201,6 +203,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
struct nf_conntrack_zone zone;
+ struct nf_conn_help *help;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
@@ -249,7 +252,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (info->timeout[0]) {
ret = xt_ct_set_timeout(ct, par, info->timeout);
if (ret < 0)
- goto err3;
+ goto err4;
}
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
nf_conntrack_get(&ct->ct_general);
@@ -257,6 +260,10 @@ out:
info->ct = ct;
return 0;
+err4:
+ help = nfct_help(ct);
+ if (help)
+ module_put(help->helper->me);
err3:
nf_ct_tmpl_free(ct);
err2:
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 770bbec878f1..e75ef39669c5 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -152,7 +152,7 @@ static int socket_mt_enable_defrag(struct net *net, int family)
switch (family) {
case NFPROTO_IPV4:
return nf_defrag_ipv4_enable(net);
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
case NFPROTO_IPV6:
return nf_defrag_ipv6_enable(net);
#endif
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 42a95919df09..bf602e33c40a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -516,10 +516,38 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
u16 proto, const struct sk_buff *skb)
{
struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_expect *exp;
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
return NULL;
- return __nf_ct_expect_find(net, zone, &tuple);
+
+ exp = __nf_ct_expect_find(net, zone, &tuple);
+ if (exp) {
+ struct nf_conntrack_tuple_hash *h;
+
+ /* Delete existing conntrack entry, if it clashes with the
+ * expectation. This can happen since conntrack ALGs do not
+ * check for clashes between (new) expectations and existing
+ * conntrack entries. nf_conntrack_in() will check the
+ * expectations only if a conntrack entry can not be found,
+ * which can lead to OVS finding the expectation (here) in the
+ * init direction, but which will not be removed by the
+ * nf_conntrack_in() call, if a matching conntrack entry is
+ * found instead. In this case all init direction packets
+ * would be reported as new related packets, while reply
+ * direction packets would be reported as un-related
+ * established packets.
+ */
+ h = nf_conntrack_find_get(net, zone, &tuple);
+ if (h) {
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ nf_ct_delete(ct, 0, 0);
+ nf_conntrack_put(&ct->ct_general);
+ }
+ }
+
+ return exp;
}
/* This replicates logic from nf_conntrack_core.c that is not exported. */
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 2efb36c08f2a..dee469fed967 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -203,8 +203,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
*arg = (unsigned long) head;
rcu_assign_pointer(tp->root, new);
- if (head)
- call_rcu(&head->rcu, mall_destroy_rcu);
+ call_rcu(&head->rcu, mall_destroy_rcu);
return 0;
err_replace_hw_filter:
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 4221dc359453..74456b3eb89a 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -39,6 +39,9 @@ int event_fd[MAX_PROGS];
int prog_cnt;
int prog_array_fd = -1;
+struct bpf_map_data map_data[MAX_MAPS];
+int map_data_count = 0;
+
static int populate_prog_array(const char *event, int prog_fd)
{
int ind = atoi(event), err;
@@ -186,42 +189,45 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return 0;
}
-static int load_maps(struct bpf_map_def *maps, int nr_maps,
- const char **map_names, fixup_map_cb fixup_map)
+static int load_maps(struct bpf_map_data *maps, int nr_maps,
+ fixup_map_cb fixup_map)
{
int i;
- /*
- * Warning: Using "maps" pointing to ELF data_maps->d_buf as
- * an array of struct bpf_map_def is a wrong assumption about
- * the ELF maps section format.
- */
+
for (i = 0; i < nr_maps; i++) {
- if (fixup_map)
- fixup_map(&maps[i], map_names[i], i);
-
- if (maps[i].type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
- maps[i].type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- int inner_map_fd = map_fd[maps[i].inner_map_idx];
-
- map_fd[i] = bpf_create_map_in_map(maps[i].type,
- maps[i].key_size,
- inner_map_fd,
- maps[i].max_entries,
- maps[i].map_flags);
+ if (fixup_map) {
+ fixup_map(&maps[i], i);
+ /* Allow userspace to assign map FD prior to creation */
+ if (maps[i].fd != -1) {
+ map_fd[i] = maps[i].fd;
+ continue;
+ }
+ }
+
+ if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+ maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
+
+ map_fd[i] = bpf_create_map_in_map(maps[i].def.type,
+ maps[i].def.key_size,
+ inner_map_fd,
+ maps[i].def.max_entries,
+ maps[i].def.map_flags);
} else {
- map_fd[i] = bpf_create_map(maps[i].type,
- maps[i].key_size,
- maps[i].value_size,
- maps[i].max_entries,
- maps[i].map_flags);
+ map_fd[i] = bpf_create_map(maps[i].def.type,
+ maps[i].def.key_size,
+ maps[i].def.value_size,
+ maps[i].def.max_entries,
+ maps[i].def.map_flags);
}
if (map_fd[i] < 0) {
printf("failed to create a map: %d %s\n",
errno, strerror(errno));
return 1;
}
+ maps[i].fd = map_fd[i];
- if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
prog_array_fd = map_fd[i];
}
return 0;
@@ -251,7 +257,8 @@ static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
}
static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
- GElf_Shdr *shdr, struct bpf_insn *insn)
+ GElf_Shdr *shdr, struct bpf_insn *insn,
+ struct bpf_map_data *maps, int nr_maps)
{
int i, nrels;
@@ -261,6 +268,8 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
GElf_Sym sym;
GElf_Rel rel;
unsigned int insn_idx;
+ bool match = false;
+ int j, map_idx;
gelf_getrel(data, i, &rel);
@@ -274,11 +283,21 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
return 1;
}
insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
- /*
- * Warning: Using sizeof(struct bpf_map_def) here is a
- * wrong assumption about ELF maps section format
- */
- insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)];
+
+ /* Match FD relocation against recorded map_data[] offset */
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ if (maps[map_idx].elf_offset == sym.st_value) {
+ match = true;
+ break;
+ }
+ }
+ if (match) {
+ insn[insn_idx].imm = maps[map_idx].fd;
+ } else {
+ printf("invalid relo for insn[%d] no map_data match\n",
+ insn_idx);
+ return 1;
+ }
}
return 0;
@@ -297,40 +316,112 @@ static int cmp_symbols(const void *l, const void *r)
return 0;
}
-static int get_sorted_map_names(Elf *elf, Elf_Data *symbols, int maps_shndx,
- int strtabidx, char **map_names)
+static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
+ Elf *elf, Elf_Data *symbols, int strtabidx)
{
- GElf_Sym map_symbols[MAX_MAPS];
- int i, nr_maps = 0;
+ int map_sz_elf, map_sz_copy;
+ bool validate_zero = false;
+ Elf_Data *data_maps;
+ int i, nr_maps;
+ GElf_Sym *sym;
+ Elf_Scn *scn;
+ int copy_sz;
+
+ if (maps_shndx < 0)
+ return -EINVAL;
+ if (!symbols)
+ return -EINVAL;
+
+ /* Get data for maps section via elf index */
+ scn = elf_getscn(elf, maps_shndx);
+ if (scn)
+ data_maps = elf_getdata(scn, NULL);
+ if (!scn || !data_maps) {
+ printf("Failed to get Elf_Data from maps section %d\n",
+ maps_shndx);
+ return -EINVAL;
+ }
- for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
- assert(nr_maps < MAX_MAPS);
- if (!gelf_getsym(symbols, i, &map_symbols[nr_maps]))
+ /* For each map get corrosponding symbol table entry */
+ sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
+ for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ assert(nr_maps < MAX_MAPS+1);
+ if (!gelf_getsym(symbols, i, &sym[nr_maps]))
continue;
- if (map_symbols[nr_maps].st_shndx != maps_shndx)
+ if (sym[nr_maps].st_shndx != maps_shndx)
continue;
+ /* Only increment iif maps section */
nr_maps++;
}
- qsort(map_symbols, nr_maps, sizeof(GElf_Sym), cmp_symbols);
+ /* Align to map_fd[] order, via sort on offset in sym.st_value */
+ qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
+
+ /* Keeping compatible with ELF maps section changes
+ * ------------------------------------------------
+ * The program size of struct bpf_map_def is known by loader
+ * code, but struct stored in ELF file can be different.
+ *
+ * Unfortunately sym[i].st_size is zero. To calculate the
+ * struct size stored in the ELF file, assume all struct have
+ * the same size, and simply divide with number of map
+ * symbols.
+ */
+ map_sz_elf = data_maps->d_size / nr_maps;
+ map_sz_copy = sizeof(struct bpf_map_def);
+ if (map_sz_elf < map_sz_copy) {
+ /*
+ * Backward compat, loading older ELF file with
+ * smaller struct, keeping remaining bytes zero.
+ */
+ map_sz_copy = map_sz_elf;
+ } else if (map_sz_elf > map_sz_copy) {
+ /*
+ * Forward compat, loading newer ELF file with larger
+ * struct with unknown features. Assume zero means
+ * feature not used. Thus, validate rest of struct
+ * data is zero.
+ */
+ validate_zero = true;
+ }
+ /* Memcpy relevant part of ELF maps data to loader maps */
for (i = 0; i < nr_maps; i++) {
- char *map_name;
-
- map_name = elf_strptr(elf, strtabidx, map_symbols[i].st_name);
- if (!map_name) {
- printf("cannot get map symbol\n");
- return -1;
- }
-
- map_names[i] = strdup(map_name);
- if (!map_names[i]) {
+ unsigned char *addr, *end;
+ struct bpf_map_def *def;
+ const char *map_name;
+ size_t offset;
+
+ map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
+ maps[i].name = strdup(map_name);
+ if (!maps[i].name) {
printf("strdup(%s): %s(%d)\n", map_name,
strerror(errno), errno);
- return -1;
+ free(sym);
+ return -errno;
+ }
+
+ /* Symbol value is offset into ELF maps section data area */
+ offset = sym[i].st_value;
+ def = (struct bpf_map_def *)(data_maps->d_buf + offset);
+ maps[i].elf_offset = offset;
+ memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
+ memcpy(&maps[i].def, def, map_sz_copy);
+
+ /* Verify no newer features were requested */
+ if (validate_zero) {
+ addr = (unsigned char*) def + map_sz_copy;
+ end = (unsigned char*) def + map_sz_elf;
+ for (; addr < end; addr++) {
+ if (*addr != 0) {
+ free(sym);
+ return -EFBIG;
+ }
+ }
}
}
+ free(sym);
return nr_maps;
}
@@ -341,7 +432,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
GElf_Ehdr ehdr;
GElf_Shdr shdr, shdr_prog;
Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
- char *shname, *shname_prog, *map_names[MAX_MAPS] = { NULL };
+ char *shname, *shname_prog;
+ int nr_maps = 0;
/* reset global variables */
kern_version = 0;
@@ -389,8 +481,12 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
}
memcpy(&kern_version, data->d_buf, sizeof(int));
} else if (strcmp(shname, "maps") == 0) {
+ int j;
+
maps_shndx = i;
data_maps = data;
+ for (j = 0; j < MAX_MAPS; j++)
+ map_data[j].fd = -1;
} else if (shdr.sh_type == SHT_SYMTAB) {
strtabidx = shdr.sh_link;
symbols = data;
@@ -405,27 +501,17 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
}
if (data_maps) {
- int nr_maps;
- int prog_elf_map_sz;
-
- nr_maps = get_sorted_map_names(elf, symbols, maps_shndx,
- strtabidx, map_names);
- if (nr_maps < 0)
- goto done;
-
- /* Deduce map struct size stored in ELF maps section */
- prog_elf_map_sz = data_maps->d_size / nr_maps;
- if (prog_elf_map_sz != sizeof(struct bpf_map_def)) {
- printf("Error: ELF maps sec wrong size (%d/%lu),"
- " old kern.o file?\n",
- prog_elf_map_sz, sizeof(struct bpf_map_def));
+ nr_maps = load_elf_maps_section(map_data, maps_shndx,
+ elf, symbols, strtabidx);
+ if (nr_maps < 0) {
+ printf("Error: Failed loading ELF maps (errno:%d):%s\n",
+ nr_maps, strerror(-nr_maps));
ret = 1;
goto done;
}
-
- if (load_maps(data_maps->d_buf, nr_maps,
- (const char **)map_names, fixup_map))
+ if (load_maps(map_data, nr_maps, fixup_map))
goto done;
+ map_data_count = nr_maps;
processed_sec[maps_shndx] = true;
}
@@ -453,7 +539,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
processed_sec[shdr.sh_info] = true;
processed_sec[i] = true;
- if (parse_relo_and_apply(data, symbols, &shdr, insns))
+ if (parse_relo_and_apply(data, symbols, &shdr, insns,
+ map_data, nr_maps))
continue;
if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
@@ -488,8 +575,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
ret = 0;
done:
- for (i = 0; i < MAX_MAPS; i++)
- free(map_names[i]);
close(fd);
return ret;
}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 05822f83173a..ca0563d04744 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -15,15 +15,27 @@ struct bpf_map_def {
unsigned int inner_map_idx;
};
-typedef void (*fixup_map_cb)(struct bpf_map_def *map, const char *map_name,
- int idx);
+struct bpf_map_data {
+ int fd;
+ char *name;
+ size_t elf_offset;
+ struct bpf_map_def def;
+};
+
+typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
-extern int map_fd[MAX_MAPS];
extern int prog_fd[MAX_PROGS];
extern int event_fd[MAX_PROGS];
extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
extern int prog_cnt;
+/* There is a one-to-one mapping between map_fd[] and map_data[].
+ * The map_data[] just contains more rich info on the given map.
+ */
+extern int map_fd[MAX_MAPS];
+extern struct bpf_map_data map_data[MAX_MAPS];
+extern int map_data_count;
+
/* parses elf file compiled by llvm .c->.o
* . parses 'maps' section and creates maps via BPF syscall
* . parses 'license' section and passes it to syscall
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 6ac778153315..1a8894b5ac51 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -320,21 +320,21 @@ static void fill_lpm_trie(void)
assert(!r);
}
-static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
+static void fixup_map(struct bpf_map_data *map, int idx)
{
int i;
- if (!strcmp("inner_lru_hash_map", name)) {
+ if (!strcmp("inner_lru_hash_map", map->name)) {
inner_lru_hash_idx = idx;
- inner_lru_hash_size = map->max_entries;
+ inner_lru_hash_size = map->def.max_entries;
}
- if (!strcmp("array_of_lru_hashs", name)) {
+ if (!strcmp("array_of_lru_hashs", map->name)) {
if (inner_lru_hash_idx == -1) {
printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
exit(1);
}
- map->inner_map_idx = inner_lru_hash_idx;
+ map->def.inner_map_idx = inner_lru_hash_idx;
array_of_lru_hashs_idx = idx;
}
@@ -345,9 +345,9 @@ static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
/* Only change the max_entries for the enabled test(s) */
for (i = 0; i < NR_TESTS; i++) {
- if (!strcmp(test_map_names[i], name) &&
+ if (!strcmp(test_map_names[i], map->name) &&
(check_test_flags(i))) {
- map->max_entries = num_map_entries;
+ map->def.max_entries = num_map_entries;
}
}
}
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index ded9804c5034..7fee0f1ba9a3 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -4,6 +4,7 @@
#include <signal.h>
#include <linux/bpf.h>
#include <string.h>
+#include <sys/resource.h>
#include "libbpf.h"
#include "bpf_load.h"
@@ -112,6 +113,7 @@ static void int_exit(int sig)
int main(int ac, char **argv)
{
+ struct rlimit r = {1024*1024, RLIM_INFINITY};
char filename[256];
long key, next_key, value;
FILE *f;
@@ -119,6 +121,11 @@ int main(int ac, char **argv)
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
signal(SIGINT, int_exit);
/* start 'ping' in the background to have some kfree_skb events */
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index 8f7d199d5945..fe372239d505 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -11,6 +11,7 @@
#include <stdbool.h>
#include <string.h>
#include <linux/bpf.h>
+#include <sys/resource.h>
#include "libbpf.h"
#include "bpf_load.h"
@@ -112,11 +113,17 @@ static void print_hist(int fd)
int main(int ac, char **argv)
{
+ struct rlimit r = {1024*1024, RLIM_INFINITY};
char filename[256];
int i;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index 03449f773cb1..22c644f1f4c3 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -12,6 +12,8 @@
#include <string.h>
#include <time.h>
#include <linux/bpf.h>
+#include <sys/resource.h>
+
#include "libbpf.h"
#include "bpf_load.h"
@@ -50,11 +52,17 @@ static void print_old_objects(int fd)
int main(int ac, char **argv)
{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256];
int i;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
+ return 1;
+ }
+
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index d8d94b9bd76c..91edd0566237 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -13,7 +13,7 @@ LDLIBS += -lcap -lelf
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs
-TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o
+TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o
TEST_PROGS := test_kmod.sh
@@ -34,6 +34,6 @@ $(BPFOBJ): force
CLANG ?= clang
%.o: %.c
- $(CLANG) -I../../../include/uapi -I../../../../samples/bpf/ \
- -D__x86_64__ -Wno-compare-distinct-pointer-types \
+ $(CLANG) -I. -I../../../include/uapi -I../../../../samples/bpf/ \
+ -Wno-compare-distinct-pointer-types \
-O2 -target bpf -c $< -o $@
diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h
new file mode 100644
index 000000000000..719225b16626
--- /dev/null
+++ b/tools/testing/selftests/bpf/gnu/stubs.h
@@ -0,0 +1 @@
+/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 4ed049a0b14b..b59f5ed4ae40 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -268,6 +268,21 @@ out:
bpf_object__close(obj);
}
+static void test_tcp_estats(void)
+{
+ const char *file = "./test_tcp_estats.o";
+ int err, prog_fd;
+ struct bpf_object *obj;
+ __u32 duration = 0;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ CHECK(err, "", "err %d errno %d\n", err, errno);
+ if (err)
+ return;
+
+ bpf_object__close(obj);
+}
+
int main(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@ -277,6 +292,7 @@ int main(void)
test_pkt_access();
test_xdp();
test_l4lb();
+ test_tcp_estats();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return 0;
diff --git a/tools/testing/selftests/bpf/test_tcp_estats.c b/tools/testing/selftests/bpf/test_tcp_estats.c
new file mode 100644
index 000000000000..bee3bbecc0c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_estats.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+/* This program shows clang/llvm is able to generate code pattern
+ * like:
+ * _tcp_send_active_reset:
+ * 0: bf 16 00 00 00 00 00 00 r6 = r1
+ * ......
+ * 335: b7 01 00 00 0f 00 00 00 r1 = 15
+ * 336: 05 00 48 00 00 00 00 00 goto 72
+ *
+ * LBB0_3:
+ * 337: b7 01 00 00 01 00 00 00 r1 = 1
+ * 338: 63 1a d0 ff 00 00 00 00 *(u32 *)(r10 - 48) = r1
+ * 408: b7 01 00 00 03 00 00 00 r1 = 3
+ *
+ * LBB0_4:
+ * 409: 71 a2 fe ff 00 00 00 00 r2 = *(u8 *)(r10 - 2)
+ * 410: bf a7 00 00 00 00 00 00 r7 = r10
+ * 411: 07 07 00 00 b8 ff ff ff r7 += -72
+ * 412: bf 73 00 00 00 00 00 00 r3 = r7
+ * 413: 0f 13 00 00 00 00 00 00 r3 += r1
+ * 414: 73 23 2d 00 00 00 00 00 *(u8 *)(r3 + 45) = r2
+ *
+ * From the above code snippet, the code generated by the compiler
+ * is reasonable. The "r1" is assigned to different values in basic
+ * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4".
+ * The verifier should be able to handle such code patterns.
+ */
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/version.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define TCP_ESTATS_MAGIC 0xBAADBEEF
+
+/* This test case needs "sock" and "pt_regs" data structure.
+ * Recursively, "sock" needs "sock_common" and "inet_sock".
+ * However, this is a unit test case only for
+ * verifier purpose without bpf program execution.
+ * We can safely mock much simpler data structures, basically
+ * only taking the necessary fields from kernel headers.
+ */
+typedef __u32 __bitwise __portpair;
+typedef __u64 __bitwise __addrpair;
+
+struct sock_common {
+ unsigned short skc_family;
+ union {
+ __addrpair skc_addrpair;
+ struct {
+ __be32 skc_daddr;
+ __be32 skc_rcv_saddr;
+ };
+ };
+ union {
+ __portpair skc_portpair;
+ struct {
+ __be16 skc_dport;
+ __u16 skc_num;
+ };
+ };
+ struct in6_addr skc_v6_daddr;
+ struct in6_addr skc_v6_rcv_saddr;
+};
+
+struct sock {
+ struct sock_common __sk_common;
+#define sk_family __sk_common.skc_family
+#define sk_v6_daddr __sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
+};
+
+struct inet_sock {
+ struct sock sk;
+#define inet_daddr sk.__sk_common.skc_daddr
+#define inet_dport sk.__sk_common.skc_dport
+ __be32 inet_saddr;
+ __be16 inet_sport;
+};
+
+struct pt_regs {
+ long di;
+};
+
+static inline struct inet_sock *inet_sk(const struct sock *sk)
+{
+ return (struct inet_sock *)sk;
+}
+
+/* Define various data structures for state recording.
+ * Some fields are not used due to test simplification.
+ */
+enum tcp_estats_addrtype {
+ TCP_ESTATS_ADDRTYPE_IPV4 = 1,
+ TCP_ESTATS_ADDRTYPE_IPV6 = 2
+};
+
+enum tcp_estats_event_type {
+ TCP_ESTATS_ESTABLISH,
+ TCP_ESTATS_PERIODIC,
+ TCP_ESTATS_TIMEOUT,
+ TCP_ESTATS_RETRANSMIT_TIMEOUT,
+ TCP_ESTATS_RETRANSMIT_OTHER,
+ TCP_ESTATS_SYN_RETRANSMIT,
+ TCP_ESTATS_SYNACK_RETRANSMIT,
+ TCP_ESTATS_TERM,
+ TCP_ESTATS_TX_RESET,
+ TCP_ESTATS_RX_RESET,
+ TCP_ESTATS_WRITE_TIMEOUT,
+ TCP_ESTATS_CONN_TIMEOUT,
+ TCP_ESTATS_ACK_LATENCY,
+ TCP_ESTATS_NEVENTS,
+};
+
+struct tcp_estats_event {
+ int pid;
+ int cpu;
+ unsigned long ts;
+ unsigned int magic;
+ enum tcp_estats_event_type event_type;
+};
+
+/* The below data structure is packed in order for
+ * llvm compiler to generate expected code.
+ */
+struct tcp_estats_conn_id {
+ unsigned int localaddressType;
+ struct {
+ unsigned char data[16];
+ } localaddress;
+ struct {
+ unsigned char data[16];
+ } remaddress;
+ unsigned short localport;
+ unsigned short remport;
+} __attribute__((__packed__));
+
+struct tcp_estats_basic_event {
+ struct tcp_estats_event event;
+ struct tcp_estats_conn_id conn_id;
+};
+
+struct bpf_map_def SEC("maps") ev_record_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcp_estats_basic_event),
+ .max_entries = 1024,
+};
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event,
+ enum tcp_estats_event_type type)
+{
+ event->magic = TCP_ESTATS_MAGIC;
+ event->ts = bpf_ktime_get_ns();
+ event->event_type = type;
+}
+
+static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from)
+{
+ to[0] = _(from[0]);
+ to[1] = _(from[1]);
+ to[2] = _(from[2]);
+ to[3] = _(from[3]);
+}
+
+static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id,
+ __be32 *saddr, __be32 *daddr)
+{
+ conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4;
+
+ unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+ unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr);
+}
+
+static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id,
+ __be32 *saddr, __be32 *daddr)
+{
+ conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6;
+
+ unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32),
+ (__u8 *)(saddr + 1));
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2,
+ (__u8 *)(saddr + 2));
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3,
+ (__u8 *)(saddr + 3));
+
+ unaligned_u32_set(conn_id->remaddress.data,
+ (__u8 *)(daddr));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32),
+ (__u8 *)(daddr + 1));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2,
+ (__u8 *)(daddr + 2));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3,
+ (__u8 *)(daddr + 3));
+}
+
+static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id,
+ struct sock *sk)
+{
+ conn_id->localport = _(inet_sk(sk)->inet_sport);
+ conn_id->remport = _(inet_sk(sk)->inet_dport);
+
+ if (_(sk->sk_family) == AF_INET6)
+ conn_id_ipv6_init(conn_id,
+ sk->sk_v6_rcv_saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32);
+ else
+ conn_id_ipv4_init(conn_id,
+ &inet_sk(sk)->inet_saddr,
+ &inet_sk(sk)->inet_daddr);
+}
+
+static __always_inline void tcp_estats_init(struct sock *sk,
+ struct tcp_estats_event *event,
+ struct tcp_estats_conn_id *conn_id,
+ enum tcp_estats_event_type type)
+{
+ tcp_estats_ev_init(event, type);
+ tcp_estats_conn_id_init(conn_id, sk);
+}
+
+static __always_inline void send_basic_event(struct sock *sk,
+ enum tcp_estats_event_type type)
+{
+ struct tcp_estats_basic_event ev;
+ __u32 key = bpf_get_prandom_u32();
+
+ memset(&ev, 0, sizeof(ev));
+ tcp_estats_init(sk, &ev.event, &ev.conn_id, type);
+ bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ if (!arg->sock)
+ return 0;
+
+ send_basic_event(arg->sock, TCP_ESTATS_TX_RESET);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
OpenPOWER on IntegriCloud