summaryrefslogtreecommitdiffstats
path: root/drivers/net/vrf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/vrf.c')
-rw-r--r--drivers/net/vrf.c233
1 files changed, 202 insertions, 31 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index d6988db1930d..022c0b5f9844 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -36,12 +36,14 @@
#include <net/addrconf.h>
#include <net/l3mdev.h>
#include <net/fib_rules.h>
+#include <net/netns/generic.h>
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
#define FIB_RULE_PREF 1000 /* default preference for FIB rules */
-static bool add_fib_rules = true;
+
+static unsigned int vrf_net_id;
struct net_vrf {
struct rtable __rcu *rth;
@@ -104,6 +106,23 @@ static void vrf_get_stats64(struct net_device *dev,
}
}
+/* by default VRF devices do not have a qdisc and are expected
+ * to be created with only a single queue.
+ */
+static bool qdisc_tx_is_default(const struct net_device *dev)
+{
+ struct netdev_queue *txq;
+ struct Qdisc *qdisc;
+
+ if (dev->num_tx_queues > 1)
+ return false;
+
+ txq = netdev_get_tx_queue(dev, 0);
+ qdisc = rcu_access_pointer(txq->qdisc);
+
+ return !qdisc->enqueue;
+}
+
/* Local traffic destined to local address. Reinsert the packet to rx
* path, similar to loopback handling.
*/
@@ -357,6 +376,29 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}
+static int vrf_finish_direct(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct net_device *vrf_dev = skb->dev;
+
+ if (!list_empty(&vrf_dev->ptype_all) &&
+ likely(skb_headroom(skb) >= ETH_HLEN)) {
+ struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+
+ ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
+ eth_zero_addr(eth->h_dest);
+ eth->h_proto = skb->protocol;
+
+ rcu_read_lock_bh();
+ dev_queue_xmit_nit(skb, vrf_dev);
+ rcu_read_unlock_bh();
+
+ skb_pull(skb, ETH_HLEN);
+ }
+
+ return 1;
+}
+
#if IS_ENABLED(CONFIG_IPV6)
/* modelled after ip6_finish_output2 */
static int vrf_finish_output6(struct net *net, struct sock *sk,
@@ -405,18 +447,13 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
* packet to go through device based features such as qdisc, netfilter
* hooks and packet sockets with skb->dev set to vrf device.
*/
-static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
- struct sock *sk,
- struct sk_buff *skb)
+static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
+ struct sk_buff *skb)
{
struct net_vrf *vrf = netdev_priv(vrf_dev);
struct dst_entry *dst = NULL;
struct rt6_info *rt6;
- /* don't divert link scope packets */
- if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
- return skb;
-
rcu_read_lock();
rt6 = rcu_dereference(vrf->rt6);
@@ -438,6 +475,55 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
return skb;
}
+static int vrf_output6_direct(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ skb->protocol = htons(ETH_P_IPV6);
+
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb->dev,
+ vrf_finish_direct,
+ !(IPCB(skb)->flags & IPSKB_REROUTED));
+}
+
+static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct net *net = dev_net(vrf_dev);
+ int err;
+
+ skb->dev = vrf_dev;
+
+ err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
+ skb, NULL, vrf_dev, vrf_output6_direct);
+
+ if (likely(err == 1))
+ err = vrf_output6_direct(net, sk, skb);
+
+ /* reset skb device */
+ if (likely(err == 1))
+ nf_reset(skb);
+ else
+ skb = NULL;
+
+ return skb;
+}
+
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ /* don't divert link scope packets */
+ if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
+ return skb;
+
+ if (qdisc_tx_is_default(vrf_dev))
+ return vrf_ip6_out_direct(vrf_dev, sk, skb);
+
+ return vrf_ip6_out_redirect(vrf_dev, skb);
+}
+
/* holding rtnl */
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
@@ -609,18 +695,13 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
* packet to go through device based features such as qdisc, netfilter
* hooks and packet sockets with skb->dev set to vrf device.
*/
-static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
- struct sock *sk,
- struct sk_buff *skb)
+static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
+ struct sk_buff *skb)
{
struct net_vrf *vrf = netdev_priv(vrf_dev);
struct dst_entry *dst = NULL;
struct rtable *rth;
- /* don't divert multicast */
- if (ipv4_is_multicast(ip_hdr(skb)->daddr))
- return skb;
-
rcu_read_lock();
rth = rcu_dereference(vrf->rth);
@@ -642,6 +723,55 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
return skb;
}
+static int vrf_output_direct(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ skb->protocol = htons(ETH_P_IP);
+
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb->dev,
+ vrf_finish_direct,
+ !(IPCB(skb)->flags & IPSKB_REROUTED));
+}
+
+static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct net *net = dev_net(vrf_dev);
+ int err;
+
+ skb->dev = vrf_dev;
+
+ err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+ skb, NULL, vrf_dev, vrf_output_direct);
+
+ if (likely(err == 1))
+ err = vrf_output_direct(net, sk, skb);
+
+ /* reset skb device */
+ if (likely(err == 1))
+ nf_reset(skb);
+ else
+ skb = NULL;
+
+ return skb;
+}
+
+static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ /* don't divert multicast */
+ if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+ return skb;
+
+ if (qdisc_tx_is_default(vrf_dev))
+ return vrf_ip_out_direct(vrf_dev, sk, skb);
+
+ return vrf_ip_out_redirect(vrf_dev, skb);
+}
+
/* called with rcu lock held */
static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
struct sock *sk,
@@ -749,14 +879,24 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
{
int ret;
+ /* do not allow loopback device to be enslaved to a VRF.
+ * The vrf device acts as the loopback for the vrf.
+ */
+ if (port_dev == dev_net(dev)->loopback_dev)
+ return -EOPNOTSUPP;
+
+ port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL);
if (ret < 0)
- return ret;
+ goto err;
- port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
cycle_netdev(port_dev);
return 0;
+
+err:
+ port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
+ return ret;
}
static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
@@ -851,6 +991,7 @@ static u32 vrf_fib_table(const struct net_device *dev)
static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ kfree_skb(skb);
return 0;
}
@@ -860,7 +1001,7 @@ static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook,
{
struct net *net = dev_net(dev);
- if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0)
+ if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1)
skb = NULL; /* kfree_skb(skb) handled by nf code */
return skb;
@@ -978,9 +1119,11 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
skb->dev = vrf_dev;
skb->skb_iif = vrf_dev->ifindex;
- skb_push(skb, skb->mac_len);
- dev_queue_xmit_nit(skb, vrf_dev);
- skb_pull(skb, skb->mac_len);
+ if (!list_empty(&vrf_dev->ptype_all)) {
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+ skb_pull(skb, skb->mac_len);
+ }
IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
}
@@ -1021,9 +1164,11 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
vrf_rx_stats(vrf_dev, skb->len);
- skb_push(skb, skb->mac_len);
- dev_queue_xmit_nit(skb, vrf_dev);
- skb_pull(skb, skb->mac_len);
+ if (!list_empty(&vrf_dev->ptype_all)) {
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+ skb_pull(skb, skb->mac_len);
+ }
skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);
out:
@@ -1128,7 +1273,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
goto nla_put_failure;
/* rule only needs to appear once */
- nlh->nlmsg_flags &= NLM_F_EXCL;
+ nlh->nlmsg_flags |= NLM_F_EXCL;
frh = nlmsg_data(nlh);
memset(frh, 0, sizeof(*frh));
@@ -1146,11 +1291,11 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
/* fib_nl_{new,del}rule handling looks for net from skb->sk */
skb->sk = dev_net(dev)->rtnl;
if (add_it) {
- err = fib_nl_newrule(skb, nlh);
+ err = fib_nl_newrule(skb, nlh, NULL);
if (err == -EEXIST)
err = 0;
} else {
- err = fib_nl_delrule(skb, nlh);
+ err = fib_nl_delrule(skb, nlh, NULL);
if (err == -ENOENT)
err = 0;
}
@@ -1205,7 +1350,7 @@ static void vrf_setup(struct net_device *dev)
dev->netdev_ops = &vrf_netdev_ops;
dev->l3mdev_ops = &vrf_l3mdev_ops;
dev->ethtool_ops = &vrf_ethtool_ops;
- dev->destructor = free_netdev;
+ dev->needs_free_netdev = true;
/* Fill in device structure with ethernet-generic values. */
eth_hw_addr_random(dev);
@@ -1251,6 +1396,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net_vrf *vrf = netdev_priv(dev);
+ bool *add_fib_rules;
+ struct net *net;
int err;
if (!data || !data[IFLA_VRF_TABLE])
@@ -1266,13 +1413,15 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
if (err)
goto out;
- if (add_fib_rules) {
+ net = dev_net(dev);
+ add_fib_rules = net_generic(net, vrf_net_id);
+ if (*add_fib_rules) {
err = vrf_add_fib_rules(dev);
if (err) {
unregister_netdevice(dev);
goto out;
}
- add_fib_rules = false;
+ *add_fib_rules = false;
}
out:
@@ -1355,16 +1504,38 @@ static struct notifier_block vrf_notifier_block __read_mostly = {
.notifier_call = vrf_device_event,
};
+/* Initialize per network namespace state */
+static int __net_init vrf_netns_init(struct net *net)
+{
+ bool *add_fib_rules = net_generic(net, vrf_net_id);
+
+ *add_fib_rules = true;
+
+ return 0;
+}
+
+static struct pernet_operations vrf_net_ops __net_initdata = {
+ .init = vrf_netns_init,
+ .id = &vrf_net_id,
+ .size = sizeof(bool),
+};
+
static int __init vrf_init_module(void)
{
int rc;
register_netdevice_notifier(&vrf_notifier_block);
- rc = rtnl_link_register(&vrf_link_ops);
+ rc = register_pernet_subsys(&vrf_net_ops);
if (rc < 0)
goto error;
+ rc = rtnl_link_register(&vrf_link_ops);
+ if (rc < 0) {
+ unregister_pernet_subsys(&vrf_net_ops);
+ goto error;
+ }
+
return 0;
error:
OpenPOWER on IntegriCloud