diff options
Diffstat (limited to 'drivers/net/vxlan.c')
| -rw-r--r-- | drivers/net/vxlan.c | 440 | 
1 files changed, 331 insertions, 109 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a8c755dcab14..0e57e862c399 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -61,12 +61,6 @@  #define FDB_AGE_DEFAULT 300 /* 5 min */  #define FDB_AGE_INTERVAL (10 * HZ)	/* rescan interval */ -#define VXLAN_N_VID	(1u << 24) -#define VXLAN_VID_MASK	(VXLAN_N_VID - 1) -#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) - -#define VXLAN_FLAGS 0x08000000	/* struct vxlanhdr.vx_flags required value. */ -  /* UDP port for VXLAN traffic.   * The IANA assigned port is 4789, but the Linux default is 8472   * for compatibility with early adopters. @@ -269,15 +263,20 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)  	return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);  } -/* Find VXLAN socket based on network namespace, address family and UDP port */ -static struct vxlan_sock *vxlan_find_sock(struct net *net, -					  sa_family_t family, __be16 port) +/* Find VXLAN socket based on network namespace, address family and UDP port + * and enabled unshareable flags. + */ +static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, +					  __be16 port, u32 flags)  {  	struct vxlan_sock *vs; +	flags &= VXLAN_F_RCV_FLAGS; +  	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {  		if (inet_sk(vs->sock->sk)->inet_sport == port && -		    inet_sk(vs->sock->sk)->sk.sk_family == family) +		    inet_sk(vs->sock->sk)->sk.sk_family == family && +		    vs->flags == flags)  			return vs;  	}  	return NULL; @@ -297,11 +296,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)  /* Look up VNI in a per net namespace table */  static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, -					sa_family_t family, __be16 port) +					sa_family_t family, __be16 port, +					u32 flags)  {  	struct vxlan_sock *vs; -	vs = vxlan_find_sock(net, family, port); +	vs = vxlan_find_sock(net, family, port, flags);  	if (!vs)  		return NULL; @@ -340,6 +340,11 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,  	ndm->ndm_flags = fdb->flags;  	ndm->ndm_type = RTN_UNICAST; +	if (!net_eq(dev_net(vxlan->dev), vxlan->net) && +	    nla_put_s32(skb, NDA_LINK_NETNSID, +			peernet2id(dev_net(vxlan->dev), vxlan->net))) +		goto nla_put_failure; +  	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))  		goto nla_put_failure; @@ -364,7 +369,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,  	if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))  		goto nla_put_failure; -	return nlmsg_end(skb, nlh); +	nlmsg_end(skb, nlh); +	return 0;  nla_put_failure:  	nlmsg_cancel(skb, nlh); @@ -379,6 +385,7 @@ static inline size_t vxlan_nlmsg_size(void)  		+ nla_total_size(sizeof(__be16)) /* NDA_PORT */  		+ nla_total_size(sizeof(__be32)) /* NDA_VNI */  		+ nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */ +		+ nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */  		+ nla_total_size(sizeof(struct nda_cacheinfo));  } @@ -545,15 +552,51 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,  	return 1;  } -static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff *skb) +static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, +					  unsigned int off, +					  struct vxlanhdr *vh, size_t hdrlen, +					  u32 data) +{ +	size_t start, offset, plen; + +	if (skb->remcsum_offload) +		return vh; + +	if (!NAPI_GRO_CB(skb)->csum_valid) +		return NULL; + +	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; +	offset = start + ((data & VXLAN_RCO_UDP) ? +			  offsetof(struct udphdr, check) : +			  offsetof(struct tcphdr, check)); + +	plen = hdrlen + offset + sizeof(u16); + +	/* Pull checksum that will be written */ +	if (skb_gro_header_hard(skb, off + plen)) { +		vh = skb_gro_header_slow(skb, off + plen, off); +		if (!vh) +			return NULL; +	} + +	skb_gro_remcsum_process(skb, (void *)vh + hdrlen, start, offset); + +	skb->remcsum_offload = 1; + +	return vh; +} + +static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, +					  struct sk_buff *skb, +					  struct udp_offload *uoff)  {  	struct sk_buff *p, **pp = NULL;  	struct vxlanhdr *vh, *vh2; -	struct ethhdr *eh, *eh2; -	unsigned int hlen, off_vx, off_eth; -	const struct packet_offload *ptype; -	__be16 type; +	unsigned int hlen, off_vx;  	int flush = 1; +	struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock, +					     udp_offloads); +	u32 flags;  	off_vx = skb_gro_offset(skb);  	hlen = off_vx + sizeof(*vh); @@ -563,15 +606,17 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff  		if (unlikely(!vh))  			goto out;  	} +  	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */  	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr)); -	off_eth = skb_gro_offset(skb); -	hlen = off_eth + sizeof(*eh); -	eh   = skb_gro_header_fast(skb, off_eth); -	if (skb_gro_header_hard(skb, hlen)) { -		eh = skb_gro_header_slow(skb, hlen, off_eth); -		if (unlikely(!eh)) +	flags = ntohl(vh->vx_flags); + +	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { +		vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr), +				       ntohl(vh->vx_vni)); + +		if (!vh)  			goto out;  	} @@ -582,54 +627,27 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff  			continue;  		vh2 = (struct vxlanhdr *)(p->data + off_vx); -		eh2 = (struct ethhdr   *)(p->data + off_eth); -		if (vh->vx_vni != vh2->vx_vni || compare_ether_header(eh, eh2)) { +		if (vh->vx_flags != vh2->vx_flags || +		    vh->vx_vni != vh2->vx_vni) {  			NAPI_GRO_CB(p)->same_flow = 0;  			continue;  		}  	} -	type = eh->h_proto; - -	rcu_read_lock(); -	ptype = gro_find_receive_by_type(type); -	if (ptype == NULL) { -		flush = 1; -		goto out_unlock; -	} +	pp = eth_gro_receive(head, skb); -	skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */ -	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); -	pp = ptype->callbacks.gro_receive(head, skb); - -out_unlock: -	rcu_read_unlock();  out:  	NAPI_GRO_CB(skb)->flush |= flush;  	return pp;  } -static int vxlan_gro_complete(struct sk_buff *skb, int nhoff) +static int vxlan_gro_complete(struct sk_buff *skb, int nhoff, +			      struct udp_offload *uoff)  { -	struct ethhdr *eh; -	struct packet_offload *ptype; -	__be16 type; -	int vxlan_len  = sizeof(struct vxlanhdr) + sizeof(struct ethhdr); -	int err = -ENOSYS; -  	udp_tunnel_gro_complete(skb, nhoff); -	eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr)); -	type = eh->h_proto; - -	rcu_read_lock(); -	ptype = gro_find_complete_by_type(type); -	if (ptype != NULL) -		err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len); - -	rcu_read_unlock(); -	return err; +	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));  }  /* Notify netdevs that UDP port started listening */ @@ -991,7 +1009,7 @@ static bool vxlan_snoop(struct net_device *dev,  		if (net_ratelimit())  			netdev_info(dev,  				    "%pM migrated from %pIS to %pIS\n", -				    src_mac, &rdst->remote_ip, &src_ip); +				    src_mac, &rdst->remote_ip.sa, &src_ip->sa);  		rdst->remote_ip = *src_ip;  		f->updated = jiffies; @@ -1131,33 +1149,107 @@ static void vxlan_igmp_leave(struct work_struct *work)  	dev_put(vxlan->dev);  } +static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, +				      size_t hdrlen, u32 data) +{ +	size_t start, offset, plen; + +	if (skb->remcsum_offload) { +		/* Already processed in GRO path */ +		skb->remcsum_offload = 0; +		return vh; +	} + +	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; +	offset = start + ((data & VXLAN_RCO_UDP) ? +			  offsetof(struct udphdr, check) : +			  offsetof(struct tcphdr, check)); + +	plen = hdrlen + offset + sizeof(u16); + +	if (!pskb_may_pull(skb, plen)) +		return NULL; + +	vh = (struct vxlanhdr *)(udp_hdr(skb) + 1); + +	skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset); + +	return vh; +} +  /* Callback from net/ipv4/udp.c to receive packets */  static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)  {  	struct vxlan_sock *vs;  	struct vxlanhdr *vxh; +	u32 flags, vni; +	struct vxlan_metadata md = {0};  	/* Need Vxlan and inner Ethernet header to be present */  	if (!pskb_may_pull(skb, VXLAN_HLEN))  		goto error; -	/* Return packets with reserved bits set */  	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); -	if (vxh->vx_flags != htonl(VXLAN_FLAGS) || -	    (vxh->vx_vni & htonl(0xff))) { -		netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", -			   ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); -		goto error; +	flags = ntohl(vxh->vx_flags); +	vni = ntohl(vxh->vx_vni); + +	if (flags & VXLAN_HF_VNI) { +		flags &= ~VXLAN_HF_VNI; +	} else { +		/* VNI flag always required to be set */ +		goto bad_flags;  	}  	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))  		goto drop; +	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);  	vs = rcu_dereference_sk_user_data(sk);  	if (!vs)  		goto drop; -	vs->rcv(vs, skb, vxh->vx_vni); +	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { +		vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni); +		if (!vxh) +			goto drop; + +		flags &= ~VXLAN_HF_RCO; +		vni &= VXLAN_VID_MASK; +	} + +	/* For backwards compatibility, only allow reserved fields to be +	 * used by VXLAN extensions if explicitly requested. +	 */ +	if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { +		struct vxlanhdr_gbp *gbp; + +		gbp = (struct vxlanhdr_gbp *)vxh; +		md.gbp = ntohs(gbp->policy_id); + +		if (gbp->dont_learn) +			md.gbp |= VXLAN_GBP_DONT_LEARN; + +		if (gbp->policy_applied) +			md.gbp |= VXLAN_GBP_POLICY_APPLIED; + +		flags &= ~VXLAN_GBP_USED_BITS; +	} + +	if (flags || (vni & ~VXLAN_VID_MASK)) { +		/* If there are any unprocessed flags remaining treat +		 * this as a malformed packet. This behavior diverges from +		 * VXLAN RFC (RFC7348) which stipulates that bits in reserved +		 * in reserved fields are to be ignored. The approach here +		 * maintains compatbility with previous stack code, and also +		 * is more robust and provides a little more security in +		 * adding extensions to VXLAN. +		 */ + +		goto bad_flags; +	} + +	md.vni = vxh->vx_vni; +	vs->rcv(vs, skb, &md);  	return 0;  drop: @@ -1165,13 +1257,17 @@ drop:  	kfree_skb(skb);  	return 0; +bad_flags: +	netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", +		   ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); +  error:  	/* Return non vxlan pkt */  	return 1;  } -static void vxlan_rcv(struct vxlan_sock *vs, -		      struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, +		      struct vxlan_metadata *md)  {  	struct iphdr *oip = NULL;  	struct ipv6hdr *oip6 = NULL; @@ -1182,7 +1278,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,  	int err = 0;  	union vxlan_addr *remote_ip; -	vni = ntohl(vx_vni) >> 8; +	vni = ntohl(md->vni) >> 8;  	/* Is this VNI defined? */  	vxlan = vxlan_vs_find_vni(vs, vni);  	if (!vxlan) @@ -1216,6 +1312,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,  		goto drop;  	skb_reset_network_header(skb); +	skb->mark = md->gbp;  	if (oip6)  		err = IP6_ECN_decapsulate(oip6, skb); @@ -1565,20 +1662,54 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)  	return false;  } +static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, +				struct vxlan_metadata *md) +{ +	struct vxlanhdr_gbp *gbp; + +	if (!md->gbp) +		return; + +	gbp = (struct vxlanhdr_gbp *)vxh; +	vxh->vx_flags |= htonl(VXLAN_HF_GBP); + +	if (md->gbp & VXLAN_GBP_DONT_LEARN) +		gbp->dont_learn = 1; + +	if (md->gbp & VXLAN_GBP_POLICY_APPLIED) +		gbp->policy_applied = 1; + +	gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); +} +  #if IS_ENABLED(CONFIG_IPV6) -static int vxlan6_xmit_skb(struct vxlan_sock *vs, -			   struct dst_entry *dst, struct sk_buff *skb, +static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,  			   struct net_device *dev, struct in6_addr *saddr,  			   struct in6_addr *daddr, __u8 prio, __u8 ttl, -			   __be16 src_port, __be16 dst_port, __be32 vni, -			   bool xnet) +			   __be16 src_port, __be16 dst_port, +			   struct vxlan_metadata *md, bool xnet, u32 vxflags)  {  	struct vxlanhdr *vxh;  	int min_headroom;  	int err; -	bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk); +	bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX); +	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; +	u16 hdrlen = sizeof(struct vxlanhdr); + +	if ((vxflags & VXLAN_F_REMCSUM_TX) && +	    skb->ip_summed == CHECKSUM_PARTIAL) { +		int csum_start = skb_checksum_start_offset(skb); + +		if (csum_start <= VXLAN_MAX_REMCSUM_START && +		    !(csum_start & VXLAN_RCO_SHIFT_MASK) && +		    (skb->csum_offset == offsetof(struct udphdr, check) || +		     skb->csum_offset == offsetof(struct tcphdr, check))) { +			udp_sum = false; +			type |= SKB_GSO_TUNNEL_REMCSUM; +		} +	} -	skb = udp_tunnel_handle_offloads(skb, udp_sum); +	skb = iptunnel_handle_offloads(skb, udp_sum, type);  	if (IS_ERR(skb)) {  		err = -EINVAL;  		goto err; @@ -1588,7 +1719,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,  	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len  			+ VXLAN_HLEN + sizeof(struct ipv6hdr) -			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); +			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);  	/* Need space for new headers (invalidates iph ptr) */  	err = skb_cow_head(skb, min_headroom); @@ -1604,13 +1735,33 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,  	}  	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); -	vxh->vx_flags = htonl(VXLAN_FLAGS); -	vxh->vx_vni = vni; +	vxh->vx_flags = htonl(VXLAN_HF_VNI); +	vxh->vx_vni = md->vni; + +	if (type & SKB_GSO_TUNNEL_REMCSUM) { +		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> +			   VXLAN_RCO_SHIFT; + +		if (skb->csum_offset == offsetof(struct udphdr, check)) +			data |= VXLAN_RCO_UDP; + +		vxh->vx_vni |= htonl(data); +		vxh->vx_flags |= htonl(VXLAN_HF_RCO); + +		if (!skb_is_gso(skb)) { +			skb->ip_summed = CHECKSUM_NONE; +			skb->encapsulation = 0; +		} +	} + +	if (vxflags & VXLAN_F_GBP) +		vxlan_build_gbp_hdr(vxh, vxflags, md);  	skb_set_inner_protocol(skb, htons(ETH_P_TEB)); -	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio, -			     ttl, src_port, dst_port); +	udp_tunnel6_xmit_skb(dst, skb, dev, saddr, daddr, prio, +			     ttl, src_port, dst_port, +			     !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));  	return 0;  err:  	dst_release(dst); @@ -1618,23 +1769,38 @@ err:  }  #endif -int vxlan_xmit_skb(struct vxlan_sock *vs, -		   struct rtable *rt, struct sk_buff *skb, +int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,  		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, -		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) +		   __be16 src_port, __be16 dst_port, +		   struct vxlan_metadata *md, bool xnet, u32 vxflags)  {  	struct vxlanhdr *vxh;  	int min_headroom;  	int err; -	bool udp_sum = !vs->sock->sk->sk_no_check_tx; +	bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM); +	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; +	u16 hdrlen = sizeof(struct vxlanhdr); + +	if ((vxflags & VXLAN_F_REMCSUM_TX) && +	    skb->ip_summed == CHECKSUM_PARTIAL) { +		int csum_start = skb_checksum_start_offset(skb); + +		if (csum_start <= VXLAN_MAX_REMCSUM_START && +		    !(csum_start & VXLAN_RCO_SHIFT_MASK) && +		    (skb->csum_offset == offsetof(struct udphdr, check) || +		     skb->csum_offset == offsetof(struct tcphdr, check))) { +			udp_sum = false; +			type |= SKB_GSO_TUNNEL_REMCSUM; +		} +	} -	skb = udp_tunnel_handle_offloads(skb, udp_sum); +	skb = iptunnel_handle_offloads(skb, udp_sum, type);  	if (IS_ERR(skb))  		return PTR_ERR(skb);  	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len  			+ VXLAN_HLEN + sizeof(struct iphdr) -			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); +			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);  	/* Need space for new headers (invalidates iph ptr) */  	err = skb_cow_head(skb, min_headroom); @@ -1648,13 +1814,33 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,  		return -ENOMEM;  	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); -	vxh->vx_flags = htonl(VXLAN_FLAGS); -	vxh->vx_vni = vni; +	vxh->vx_flags = htonl(VXLAN_HF_VNI); +	vxh->vx_vni = md->vni; + +	if (type & SKB_GSO_TUNNEL_REMCSUM) { +		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> +			   VXLAN_RCO_SHIFT; + +		if (skb->csum_offset == offsetof(struct udphdr, check)) +			data |= VXLAN_RCO_UDP; + +		vxh->vx_vni |= htonl(data); +		vxh->vx_flags |= htonl(VXLAN_HF_RCO); + +		if (!skb_is_gso(skb)) { +			skb->ip_summed = CHECKSUM_NONE; +			skb->encapsulation = 0; +		} +	} + +	if (vxflags & VXLAN_F_GBP) +		vxlan_build_gbp_hdr(vxh, vxflags, md);  	skb_set_inner_protocol(skb, htons(ETH_P_TEB)); -	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos, -				   ttl, df, src_port, dst_port, xnet); +	return udp_tunnel_xmit_skb(rt, skb, src, dst, tos, +				   ttl, df, src_port, dst_port, xnet, +				   !(vxflags & VXLAN_F_UDP_CSUM));  }  EXPORT_SYMBOL_GPL(vxlan_xmit_skb); @@ -1711,6 +1897,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  	const struct iphdr *old_iph;  	struct flowi4 fl4;  	union vxlan_addr *dst; +	struct vxlan_metadata md;  	__be16 src_port = 0, dst_port;  	u32 vni;  	__be16 df = 0; @@ -1772,7 +1959,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  			ip_rt_put(rt);  			dst_vxlan = vxlan_find_vni(vxlan->net, vni, -						   dst->sa.sa_family, dst_port); +						   dst->sa.sa_family, dst_port, +						   vxlan->flags);  			if (!dst_vxlan)  				goto tx_error;  			vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1781,12 +1969,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);  		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); - -		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, -				     fl4.saddr, dst->sin.sin_addr.s_addr, -				     tos, ttl, df, src_port, dst_port, -				     htonl(vni << 8), -				     !net_eq(vxlan->net, dev_net(vxlan->dev))); +		md.vni = htonl(vni << 8); +		md.gbp = skb->mark; + +		err = vxlan_xmit_skb(rt, skb, fl4.saddr, +				     dst->sin.sin_addr.s_addr, tos, ttl, df, +				     src_port, dst_port, &md, +				     !net_eq(vxlan->net, dev_net(vxlan->dev)), +				     vxlan->flags);  		if (err < 0) {  			/* skb is already freed. */  			skb = NULL; @@ -1830,7 +2020,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  			dst_release(ndst);  			dst_vxlan = vxlan_find_vni(vxlan->net, vni, -						   dst->sa.sa_family, dst_port); +						   dst->sa.sa_family, dst_port, +						   vxlan->flags);  			if (!dst_vxlan)  				goto tx_error;  			vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1838,11 +2029,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  		}  		ttl = ttl ? : ip6_dst_hoplimit(ndst); +		md.vni = htonl(vni << 8); +		md.gbp = skb->mark; -		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb, -				      dev, &fl6.saddr, &fl6.daddr, 0, ttl, -				      src_port, dst_port, htonl(vni << 8), -				      !net_eq(vxlan->net, dev_net(vxlan->dev))); +		err = vxlan6_xmit_skb(ndst, skb, dev, &fl6.saddr, &fl6.daddr, +				      0, ttl, src_port, dst_port, &md, +				      !net_eq(vxlan->net, dev_net(vxlan->dev)), +				      vxlan->flags);  #endif  	} @@ -1998,7 +2191,7 @@ static int vxlan_init(struct net_device *dev)  	spin_lock(&vn->sock_lock);  	vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, -			     vxlan->dst_port); +			     vxlan->dst_port, vxlan->flags);  	if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {  		/* If we have a socket with same port already, reuse it */  		vxlan_vs_add_dev(vs, vxlan); @@ -2242,6 +2435,9 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {  	[IFLA_VXLAN_UDP_CSUM]	= { .type = NLA_U8 },  	[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]	= { .type = NLA_U8 },  	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 }, +	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 }, +	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 }, +	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },  };  static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -2311,15 +2507,11 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,  	if (ipv6) {  		udp_conf.family = AF_INET6; -		udp_conf.use_udp6_tx_checksums = -		    !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);  		udp_conf.use_udp6_rx_checksums =  		    !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);  	} else {  		udp_conf.family = AF_INET;  		udp_conf.local_ip.s_addr = INADDR_ANY; -		udp_conf.use_udp_checksums = -		    !!(flags & VXLAN_F_UDP_CSUM);  	}  	udp_conf.local_udp_port = port; @@ -2363,6 +2555,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,  	atomic_set(&vs->refcnt, 1);  	vs->rcv = rcv;  	vs->data = data; +	vs->flags = (flags & VXLAN_F_RCV_FLAGS);  	/* Initialize the vxlan udp offloads structure */  	vs->udp_offloads.port = port; @@ -2401,7 +2594,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,  		return vs;  	spin_lock(&vn->sock_lock); -	vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); +	vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);  	if (vs && ((vs->rcv != rcv) ||  		   !atomic_add_unless(&vs->refcnt, 1, 0)))  			vs = ERR_PTR(-EBUSY); @@ -2557,8 +2750,19 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,  	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))  		vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX; +	if (data[IFLA_VXLAN_REMCSUM_TX] && +	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX])) +		vxlan->flags |= VXLAN_F_REMCSUM_TX; + +	if (data[IFLA_VXLAN_REMCSUM_RX] && +	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX])) +		vxlan->flags |= VXLAN_F_REMCSUM_RX; + +	if (data[IFLA_VXLAN_GBP]) +		vxlan->flags |= VXLAN_F_GBP; +  	if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET, -			   vxlan->dst_port)) { +			   vxlan->dst_port, vxlan->flags)) {  		pr_info("duplicate VNI %u\n", vni);  		return -EEXIST;  	} @@ -2625,6 +2829,8 @@ static size_t vxlan_get_size(const struct net_device *dev)  		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */  		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */  		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */ +		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */ +		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */  		0;  } @@ -2690,18 +2896,33 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)  	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,  			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||  	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, -			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX))) +			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) || +	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX, +			!!(vxlan->flags & VXLAN_F_REMCSUM_TX)) || +	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX, +			!!(vxlan->flags & VXLAN_F_REMCSUM_RX)))  		goto nla_put_failure;  	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))  		goto nla_put_failure; +	if (vxlan->flags & VXLAN_F_GBP && +	    nla_put_flag(skb, IFLA_VXLAN_GBP)) +		goto nla_put_failure; +  	return 0;  nla_put_failure:  	return -EMSGSIZE;  } +static struct net *vxlan_get_link_net(const struct net_device *dev) +{ +	struct vxlan_dev *vxlan = netdev_priv(dev); + +	return vxlan->net; +} +  static struct rtnl_link_ops vxlan_link_ops __read_mostly = {  	.kind		= "vxlan",  	.maxtype	= IFLA_VXLAN_MAX, @@ -2713,6 +2934,7 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {  	.dellink	= vxlan_dellink,  	.get_size	= vxlan_get_size,  	.fill_info	= vxlan_fill_info, +	.get_link_net	= vxlan_get_link_net,  };  static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,  | 

