diff options
Diffstat (limited to 'drivers/net/vrf.c')
-rw-r--r-- | drivers/net/vrf.c | 233 |
1 files changed, 202 insertions, 31 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index d6988db1930d..022c0b5f9844 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -36,12 +36,14 @@ #include <net/addrconf.h> #include <net/l3mdev.h> #include <net/fib_rules.h> +#include <net/netns/generic.h> #define DRV_NAME "vrf" #define DRV_VERSION "1.0" #define FIB_RULE_PREF 1000 /* default preference for FIB rules */ -static bool add_fib_rules = true; + +static unsigned int vrf_net_id; struct net_vrf { struct rtable __rcu *rth; @@ -104,6 +106,23 @@ static void vrf_get_stats64(struct net_device *dev, } } +/* by default VRF devices do not have a qdisc and are expected + * to be created with only a single queue. + */ +static bool qdisc_tx_is_default(const struct net_device *dev) +{ + struct netdev_queue *txq; + struct Qdisc *qdisc; + + if (dev->num_tx_queues > 1) + return false; + + txq = netdev_get_tx_queue(dev, 0); + qdisc = rcu_access_pointer(txq->qdisc); + + return !qdisc->enqueue; +} + /* Local traffic destined to local address. Reinsert the packet to rx * path, similar to loopback handling. */ @@ -357,6 +376,29 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } +static int vrf_finish_direct(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct net_device *vrf_dev = skb->dev; + + if (!list_empty(&vrf_dev->ptype_all) && + likely(skb_headroom(skb) >= ETH_HLEN)) { + struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); + + ether_addr_copy(eth->h_source, vrf_dev->dev_addr); + eth_zero_addr(eth->h_dest); + eth->h_proto = skb->protocol; + + rcu_read_lock_bh(); + dev_queue_xmit_nit(skb, vrf_dev); + rcu_read_unlock_bh(); + + skb_pull(skb, ETH_HLEN); + } + + return 1; +} + #if IS_ENABLED(CONFIG_IPV6) /* modelled after ip6_finish_output2 */ static int vrf_finish_output6(struct net *net, struct sock *sk, @@ -405,18 +447,13 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) * packet to go through device based features such as qdisc, netfilter * hooks and packet sockets with skb->dev set to vrf device. */ -static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, - struct sock *sk, - struct sk_buff *skb) +static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev, + struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct dst_entry *dst = NULL; struct rt6_info *rt6; - /* don't divert link scope packets */ - if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) - return skb; - rcu_read_lock(); rt6 = rcu_dereference(vrf->rt6); @@ -438,6 +475,55 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, return skb; } +static int vrf_output6_direct(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + skb->protocol = htons(ETH_P_IPV6); + + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb->dev, + vrf_finish_direct, + !(IPCB(skb)->flags & IPSKB_REROUTED)); +} + +static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net *net = dev_net(vrf_dev); + int err; + + skb->dev = vrf_dev; + + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, vrf_dev, vrf_output6_direct); + + if (likely(err == 1)) + err = vrf_output6_direct(net, sk, skb); + + /* reset skb device */ + if (likely(err == 1)) + nf_reset(skb); + else + skb = NULL; + + return skb; +} + +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + /* don't divert link scope packets */ + if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) + return skb; + + if (qdisc_tx_is_default(vrf_dev)) + return vrf_ip6_out_direct(vrf_dev, sk, skb); + + return vrf_ip6_out_redirect(vrf_dev, skb); +} + /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { @@ -609,18 +695,13 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) * packet to go through device based features such as qdisc, netfilter * hooks and packet sockets with skb->dev set to vrf device. */ -static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, - struct sock *sk, - struct sk_buff *skb) +static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev, + struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct dst_entry *dst = NULL; struct rtable *rth; - /* don't divert multicast */ - if (ipv4_is_multicast(ip_hdr(skb)->daddr)) - return skb; - rcu_read_lock(); rth = rcu_dereference(vrf->rth); @@ -642,6 +723,55 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, return skb; } +static int vrf_output_direct(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + skb->protocol = htons(ETH_P_IP); + + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb->dev, + vrf_finish_direct, + !(IPCB(skb)->flags & IPSKB_REROUTED)); +} + +static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net *net = dev_net(vrf_dev); + int err; + + skb->dev = vrf_dev; + + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, vrf_dev, vrf_output_direct); + + if (likely(err == 1)) + err = vrf_output_direct(net, sk, skb); + + /* reset skb device */ + if (likely(err == 1)) + nf_reset(skb); + else + skb = NULL; + + return skb; +} + +static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + /* don't divert multicast */ + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) + return skb; + + if (qdisc_tx_is_default(vrf_dev)) + return vrf_ip_out_direct(vrf_dev, sk, skb); + + return vrf_ip_out_redirect(vrf_dev, skb); +} + /* called with rcu lock held */ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, struct sock *sk, @@ -749,14 +879,24 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) { int ret; + /* do not allow loopback device to be enslaved to a VRF. + * The vrf device acts as the loopback for the vrf. + */ + if (port_dev == dev_net(dev)->loopback_dev) + return -EOPNOTSUPP; + + port_dev->priv_flags |= IFF_L3MDEV_SLAVE; ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); if (ret < 0) - return ret; + goto err; - port_dev->priv_flags |= IFF_L3MDEV_SLAVE; cycle_netdev(port_dev); return 0; + +err: + port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; + return ret; } static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) @@ -851,6 +991,7 @@ static u32 vrf_fib_table(const struct net_device *dev) static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + kfree_skb(skb); return 0; } @@ -860,7 +1001,7 @@ static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook, { struct net *net = dev_net(dev); - if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0) + if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1) skb = NULL; /* kfree_skb(skb) handled by nf code */ return skb; @@ -978,9 +1119,11 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; - skb_push(skb, skb->mac_len); - dev_queue_xmit_nit(skb, vrf_dev); - skb_pull(skb, skb->mac_len); + if (!list_empty(&vrf_dev->ptype_all)) { + skb_push(skb, skb->mac_len); + dev_queue_xmit_nit(skb, vrf_dev); + skb_pull(skb, skb->mac_len); + } IP6CB(skb)->flags |= IP6SKB_L3SLAVE; } @@ -1021,9 +1164,11 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, vrf_rx_stats(vrf_dev, skb->len); - skb_push(skb, skb->mac_len); - dev_queue_xmit_nit(skb, vrf_dev); - skb_pull(skb, skb->mac_len); + if (!list_empty(&vrf_dev->ptype_all)) { + skb_push(skb, skb->mac_len); + dev_queue_xmit_nit(skb, vrf_dev); + skb_pull(skb, skb->mac_len); + } skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev); out: @@ -1128,7 +1273,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) goto nla_put_failure; /* rule only needs to appear once */ - nlh->nlmsg_flags &= NLM_F_EXCL; + nlh->nlmsg_flags |= NLM_F_EXCL; frh = nlmsg_data(nlh); memset(frh, 0, sizeof(*frh)); @@ -1146,11 +1291,11 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) /* fib_nl_{new,del}rule handling looks for net from skb->sk */ skb->sk = dev_net(dev)->rtnl; if (add_it) { - err = fib_nl_newrule(skb, nlh); + err = fib_nl_newrule(skb, nlh, NULL); if (err == -EEXIST) err = 0; } else { - err = fib_nl_delrule(skb, nlh); + err = fib_nl_delrule(skb, nlh, NULL); if (err == -ENOENT) err = 0; } @@ -1205,7 +1350,7 @@ static void vrf_setup(struct net_device *dev) dev->netdev_ops = &vrf_netdev_ops; dev->l3mdev_ops = &vrf_l3mdev_ops; dev->ethtool_ops = &vrf_ethtool_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; /* Fill in device structure with ethernet-generic values. */ eth_hw_addr_random(dev); @@ -1251,6 +1396,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net_vrf *vrf = netdev_priv(dev); + bool *add_fib_rules; + struct net *net; int err; if (!data || !data[IFLA_VRF_TABLE]) @@ -1266,13 +1413,15 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, if (err) goto out; - if (add_fib_rules) { + net = dev_net(dev); + add_fib_rules = net_generic(net, vrf_net_id); + if (*add_fib_rules) { err = vrf_add_fib_rules(dev); if (err) { unregister_netdevice(dev); goto out; } - add_fib_rules = false; + *add_fib_rules = false; } out: @@ -1355,16 +1504,38 @@ static struct notifier_block vrf_notifier_block __read_mostly = { .notifier_call = vrf_device_event, }; +/* Initialize per network namespace state */ +static int __net_init vrf_netns_init(struct net *net) +{ + bool *add_fib_rules = net_generic(net, vrf_net_id); + + *add_fib_rules = true; + + return 0; +} + +static struct pernet_operations vrf_net_ops __net_initdata = { + .init = vrf_netns_init, + .id = &vrf_net_id, + .size = sizeof(bool), +}; + static int __init vrf_init_module(void) { int rc; register_netdevice_notifier(&vrf_notifier_block); - rc = rtnl_link_register(&vrf_link_ops); + rc = register_pernet_subsys(&vrf_net_ops); if (rc < 0) goto error; + rc = rtnl_link_register(&vrf_link_ops); + if (rc < 0) { + unregister_pernet_subsys(&vrf_net_ops); + goto error; + } + return 0; error: |