summaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/actions.c33
-rw-r--r--net/openvswitch/conntrack.c52
-rw-r--r--net/openvswitch/datapath.c210
-rw-r--r--net/openvswitch/datapath.h14
-rw-r--r--net/openvswitch/flow.c193
-rw-r--r--net/openvswitch/flow.h15
-rw-r--r--net/openvswitch/flow_netlink.c121
-rw-r--r--net/openvswitch/flow_table.c381
-rw-r--r--net/openvswitch/flow_table.h19
-rw-r--r--net/openvswitch/vport-internal_dev.c13
-rw-r--r--net/openvswitch/vport.c5
11 files changed, 782 insertions, 274 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 3572e11b6f21..7fbfe2adfffa 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -161,14 +161,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr, int len);
static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_action_push_mpls *mpls)
+ __be32 mpls_lse, __be16 mpls_ethertype, __u16 mac_len)
{
int err;
- err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype);
+ err = skb_mpls_push(skb, mpls_lse, mpls_ethertype, mac_len, !!mac_len);
if (err)
return err;
+ if (!mac_len)
+ key->mac_proto = MAC_PROTO_NONE;
+
invalidate_flow_key(key);
return 0;
}
@@ -178,10 +181,14 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
{
int err;
- err = skb_mpls_pop(skb, ethertype);
+ err = skb_mpls_pop(skb, ethertype, skb->mac_len,
+ ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
if (err)
return err;
+ if (ethertype == htons(ETH_P_TEB))
+ key->mac_proto = MAC_PROTO_ETHERNET;
+
invalidate_flow_key(key);
return 0;
}
@@ -199,7 +206,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
if (err)
return err;
- flow_key->mpls.top_lse = lse;
+ flow_key->mpls.lse[0] = lse;
return 0;
}
@@ -1226,10 +1233,24 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
execute_hash(skb, key, a);
break;
- case OVS_ACTION_ATTR_PUSH_MPLS:
- err = push_mpls(skb, key, nla_data(a));
+ case OVS_ACTION_ATTR_PUSH_MPLS: {
+ struct ovs_action_push_mpls *mpls = nla_data(a);
+
+ err = push_mpls(skb, key, mpls->mpls_lse,
+ mpls->mpls_ethertype, skb->mac_len);
break;
+ }
+ case OVS_ACTION_ATTR_ADD_MPLS: {
+ struct ovs_action_add_mpls *mpls = nla_data(a);
+ __u16 mac_len = 0;
+
+ if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
+ mac_len = skb->mac_len;
+ err = push_mpls(skb, key, mpls->mpls_lse,
+ mpls->mpls_ethertype, mac_len);
+ break;
+ }
case OVS_ACTION_ATTR_POP_MPLS:
err = pop_mpls(skb, key, nla_get_be16(a));
break;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 848c6eb55064..e726159cfcfa 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -67,6 +67,7 @@ struct ovs_conntrack_info {
struct md_mark mark;
struct md_labels labels;
char timeout[CTNL_TIMEOUT_NAME_MAX];
+ struct nf_ct_timeout *nf_ct_timeout;
#if IS_ENABLED(CONFIG_NF_NAT)
struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */
#endif
@@ -524,6 +525,11 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
return -EPFNOSUPPORT;
}
+ /* The key extracted from the fragment that completed this datagram
+ * likely didn't have an L4 header, so regenerate it.
+ */
+ ovs_flow_key_update_l3l4(skb, key);
+
key->ip.frag = OVS_FRAG_TYPE_NONE;
skb_clear_hash(skb);
skb->ignore_df = 1;
@@ -697,6 +703,14 @@ static bool skb_nfct_cached(struct net *net,
if (help && rcu_access_pointer(help->helper) != info->helper)
return false;
}
+ if (info->nf_ct_timeout) {
+ struct nf_conn_timeout *timeout_ext;
+
+ timeout_ext = nf_ct_timeout_find(ct);
+ if (!timeout_ext || info->nf_ct_timeout !=
+ rcu_dereference(timeout_ext->timeout))
+ return false;
+ }
/* Force conntrack entry direction to the current packet? */
if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
/* Delete the conntrack entry if confirmed, else just release
@@ -889,6 +903,17 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
}
err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
+ if (err == NF_ACCEPT &&
+ ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
+ if (maniptype == NF_NAT_MANIP_SRC)
+ maniptype = NF_NAT_MANIP_DST;
+ else
+ maniptype = NF_NAT_MANIP_SRC;
+
+ err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
+ maniptype);
+ }
+
/* Mark NAT done if successful and update the flow key. */
if (err == NF_ACCEPT)
ovs_nat_update_key(key, skb, maniptype);
@@ -957,6 +982,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
+ bool add_helper = false;
+
/* Packets starting a new connection must be NATted before the
* helper, so that the helper knows about the NAT. We enforce
* this by delaying both NAT and helper calls for unconfirmed
@@ -974,16 +1001,17 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
/* Userspace may decide to perform a ct lookup without a helper
- * specified followed by a (recirculate and) commit with one.
- * Therefore, for unconfirmed connections which we will commit,
- * we need to attach the helper here.
+ * specified followed by a (recirculate and) commit with one,
+ * or attach a helper in a later commit. Therefore, for
+ * connections which we will commit, we may need to attach
+ * the helper here.
*/
- if (!nf_ct_is_confirmed(ct) && info->commit &&
- info->helper && !nfct_help(ct)) {
+ if (info->commit && info->helper && !nfct_help(ct)) {
int err = __nf_ct_try_assign_helper(ct, info->ct,
GFP_ATOMIC);
if (err)
return err;
+ add_helper = true;
/* helper installed, add seqadj if NAT is required */
if (info->nat && !nfct_seqadj(ct)) {
@@ -993,11 +1021,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
/* Call the helper only if:
- * - nf_conntrack_in() was executed above ("!cached") for a
- * confirmed connection, or
+ * - nf_conntrack_in() was executed above ("!cached") or a
+ * helper was just attached ("add_helper") for a confirmed
+ * connection, or
* - When committing an unconfirmed connection.
*/
- if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) &&
+ if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
+ info->commit) &&
ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
return -EINVAL;
}
@@ -1565,7 +1595,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
case OVS_CT_ATTR_TIMEOUT:
memcpy(info->timeout, nla_data(a), nla_len(a));
if (!memchr(info->timeout, '\0', nla_len(a))) {
- OVS_NLERR(log, "Invalid conntrack helper");
+ OVS_NLERR(log, "Invalid conntrack timeout");
return -EINVAL;
}
break;
@@ -1657,6 +1687,10 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
ct_info.timeout))
pr_info_ratelimited("Failed to associated timeout "
"policy `%s'\n", ct_info.timeout);
+ else
+ ct_info.nf_ct_timeout = rcu_dereference(
+ nf_ct_timeout_find(ct_info.ct)->timeout);
+
}
if (helper) {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d01410e52097..659c2a790fe7 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -222,14 +222,15 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
struct dp_stats_percpu *stats;
u64 *stats_counter;
u32 n_mask_hit;
+ int error;
stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */
- flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
+ &n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
- int error;
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
@@ -246,7 +247,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ovs_flow_stats_update(flow, key->tp.flags, skb);
sf_acts = rcu_dereference(flow->sf_acts);
- ovs_execute_actions(dp, skb, sf_acts, key);
+ error = ovs_execute_actions(dp, skb, sf_acts, key);
+ if (unlikely(error))
+ net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
+ ovs_dp_name(dp), error);
stats_counter = &stats->n_hit;
@@ -317,8 +321,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
}
/* Queue all of the segments. */
- skb = segs;
- do {
+ skb_list_walk_safe(segs, skb, nskb) {
if (gso_type & SKB_GSO_UDP && skb != segs)
key = &later_key;
@@ -326,17 +329,15 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
if (err)
break;
- } while ((skb = skb->next));
+ }
/* Free all of the segments. */
- skb = segs;
- do {
- nskb = skb->next;
+ skb_list_walk_safe(segs, skb, nskb) {
if (err)
kfree_skb(skb);
else
consume_skb(skb);
- } while ((skb = nskb));
+ }
return err;
}
@@ -346,7 +347,8 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
- + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
/* OVS_PACKET_ATTR_USERDATA */
if (upcall_info->userdata)
@@ -389,6 +391,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
size_t len;
unsigned int hlen;
int err, dp_ifindex;
+ u64 hash;
dp_ifindex = get_dpifindex(dp);
if (!dp_ifindex)
@@ -481,23 +484,30 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
}
/* Add OVS_PACKET_ATTR_MRU */
- if (upcall_info->mru) {
- if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
- upcall_info->mru)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (upcall_info->mru &&
+ nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
- if (cutlen > 0) {
- if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
- skb->len)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (cutlen > 0 &&
+ nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ /* Add OVS_PACKET_ATTR_HASH */
+ hash = skb_get_hash_raw(skb);
+ if (skb->sw_hash)
+ hash |= OVS_PACKET_HASH_SW_BIT;
+
+ if (skb->l4_hash)
+ hash |= OVS_PACKET_HASH_L4_BIT;
+
+ if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Only reserve room for attribute header, packet data is added
@@ -539,6 +549,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *input_vport;
u16 mru = 0;
+ u64 hash;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
@@ -564,6 +575,14 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
}
OVS_CB(packet)->mru = mru;
+ if (a[OVS_PACKET_ATTR_HASH]) {
+ hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
+
+ __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
+ !!(hash & OVS_PACKET_HASH_SW_BIT),
+ !!(hash & OVS_PACKET_HASH_L4_BIT));
+ }
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
@@ -701,9 +720,13 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
{
size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
- /* OVS_FLOW_ATTR_UFID */
+ /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
+ * see ovs_nla_put_identifier()
+ */
if (sfid && ovs_identifier_is_ufid(sfid))
len += nla_total_size(sfid->ufid_len);
+ else
+ len += nla_total_size(ovs_key_attr_size());
/* OVS_FLOW_ATTR_KEY */
if (!sfid || should_fill_key(sfid, ufid_flags))
@@ -879,7 +902,10 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
info->snd_portid, info->snd_seq, 0,
cmd, ufid_flags);
- BUG_ON(retval < 0);
+ if (WARN_ON_ONCE(retval < 0)) {
+ kfree_skb(skb);
+ skb = ERR_PTR(retval);
+ }
return skb;
}
@@ -1343,7 +1369,10 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
OVS_FLOW_CMD_DEL,
ufid_flags);
rcu_read_unlock();
- BUG_ON(err < 0);
+ if (WARN_ON_ONCE(err < 0)) {
+ kfree_skb(reply);
+ goto out_free;
+ }
ovs_notify(&dp_flow_genl_family, reply, info);
} else {
@@ -1351,6 +1380,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
}
}
+out_free:
ovs_flow_free(flow, true);
return 0;
unlock:
@@ -1542,10 +1572,59 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in
dp->user_features = 0;
}
-static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
+static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+{
+ u32 user_features = 0;
+
+ if (a[OVS_DP_ATTR_USER_FEATURES]) {
+ user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+
+ if (user_features & ~(OVS_DP_F_VPORT_PIDS |
+ OVS_DP_F_UNALIGNED |
+ OVS_DP_F_TC_RECIRC_SHARING))
+ return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ return -EOPNOTSUPP;
+#endif
+ }
+
+ dp->user_features = user_features;
+
+ if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ static_branch_enable(&tc_recirc_sharing_support);
+ else
+ static_branch_disable(&tc_recirc_sharing_support);
+
+ return 0;
+}
+
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+ if (!dp->stats_percpu)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
{
- if (a[OVS_DP_ATTR_USER_FEATURES])
- dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+ int i;
+
+ dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dp->ports)
+ return -ENOMEM;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->ports[i]);
+
+ return 0;
}
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1556,7 +1635,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
- int err, i;
+ int err;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
@@ -1569,35 +1648,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_free_reply;
+ goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
if (err)
- goto err_free_dp;
+ goto err_destroy_dp;
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
- if (!dp->stats_percpu) {
- err = -ENOMEM;
+ err = ovs_dp_stats_init(dp);
+ if (err)
goto err_destroy_table;
- }
-
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!dp->ports) {
- err = -ENOMEM;
- goto err_destroy_percpu;
- }
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_dp_vport_init(dp);
+ if (err)
+ goto err_destroy_stats;
err = ovs_meters_init(dp);
if (err)
- goto err_destroy_ports_array;
+ goto err_destroy_ports;
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1607,7 +1677,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
- ovs_dp_change(dp, a);
+ err = ovs_dp_change(dp, a);
+ if (err)
+ goto err_destroy_meters;
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -1627,6 +1699,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
+ ovs_unlock();
goto err_destroy_meters;
}
@@ -1643,17 +1716,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
return 0;
err_destroy_meters:
- ovs_unlock();
ovs_meters_exit(dp);
-err_destroy_ports_array:
+err_destroy_ports:
kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
-err_free_dp:
+err_destroy_dp:
kfree(dp);
-err_free_reply:
+err_destroy_reply:
kfree_skb(reply);
err:
return err;
@@ -1733,7 +1805,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(dp))
goto err_unlock_free;
- ovs_dp_change(dp, info->attrs);
+ err = ovs_dp_change(dp, info->attrs);
+ if (err)
+ goto err_unlock_free;
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_SET);
@@ -1850,7 +1924,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
/* Called with ovs_mutex or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
struct net *net, u32 portid, u32 seq,
- u32 flags, u8 cmd)
+ u32 flags, u8 cmd, gfp_t gfp)
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
@@ -1871,7 +1945,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
goto nla_put_failure;
if (!net_eq(net, dev_net(vport->dev))) {
- int id = peernet2id_alloc(net, dev_net(vport->dev));
+ int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
goto nla_put_failure;
@@ -1912,11 +1986,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
struct sk_buff *skb;
int retval;
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
+ retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
+ GFP_KERNEL);
BUG_ON(retval < 0);
return skb;
@@ -2058,7 +2133,7 @@ restart:
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW);
+ OVS_VPORT_CMD_NEW, GFP_KERNEL);
new_headroom = netdev_get_fwd_headroom(vport->dev);
@@ -2119,7 +2194,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_SET);
+ OVS_VPORT_CMD_SET, GFP_KERNEL);
BUG_ON(err < 0);
ovs_unlock();
@@ -2159,7 +2234,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_DEL);
+ OVS_VPORT_CMD_DEL, GFP_KERNEL);
BUG_ON(err < 0);
/* the vport deletion may trigger dp headroom update */
@@ -2206,7 +2281,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_GET);
+ OVS_VPORT_CMD_GET, GFP_ATOMIC);
BUG_ON(err < 0);
rcu_read_unlock();
@@ -2242,7 +2317,8 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
- OVS_VPORT_CMD_GET) < 0)
+ OVS_VPORT_CMD_GET,
+ GFP_ATOMIC) < 0)
goto out;
j++;
@@ -2263,7 +2339,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
- [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
[OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
@@ -2418,7 +2494,7 @@ static int __init dp_init(void)
{
int err;
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_field(struct sk_buff, cb));
pr_info("Open vSwitch switching datapath\n");
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 751d34accdf9..e239a46c2f94 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -139,6 +139,18 @@ struct ovs_net {
bool xt_label;
};
+/**
+ * enum ovs_pkt_hash_types - hash info to include with a packet
+ * to send to userspace.
+ * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
+ * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
+ * over transport ports.
+ */
+enum ovs_pkt_hash_types {
+ OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
+ OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
+};
+
extern unsigned int ovs_net_id;
void ovs_lock(void);
void ovs_unlock(void);
@@ -218,6 +230,8 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
+DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index bc89e16e0505..9d375e74b607 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -523,78 +523,15 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
}
/**
- * key_extract - extracts a flow key from an Ethernet frame.
+ * key_extract_l3l4 - extracts L3/L4 header information.
* @skb: sk_buff that contains the frame, with skb->data pointing to the
- * Ethernet header
+ * L3 header
* @key: output flow key
*
- * The caller must ensure that skb->len >= ETH_HLEN.
- *
- * Returns 0 if successful, otherwise a negative errno value.
- *
- * Initializes @skb header fields as follows:
- *
- * - skb->mac_header: the L2 header.
- *
- * - skb->network_header: just past the L2 header, or just past the
- * VLAN header, to the first byte of the L2 payload.
- *
- * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
- * on output, then just past the IP header, if one is present and
- * of a correct length, otherwise the same as skb->network_header.
- * For other key->eth.type values it is left untouched.
- *
- * - skb->protocol: the type of the data starting at skb->network_header.
- * Equals to key->eth.type.
*/
-static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
+static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
{
int error;
- struct ethhdr *eth;
-
- /* Flags are always used as part of stats */
- key->tp.flags = 0;
-
- skb_reset_mac_header(skb);
-
- /* Link layer. */
- clear_vlan(key);
- if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
- if (unlikely(eth_type_vlan(skb->protocol)))
- return -EINVAL;
-
- skb_reset_network_header(skb);
- key->eth.type = skb->protocol;
- } else {
- eth = eth_hdr(skb);
- ether_addr_copy(key->eth.src, eth->h_source);
- ether_addr_copy(key->eth.dst, eth->h_dest);
-
- __skb_pull(skb, 2 * ETH_ALEN);
- /* We are going to push all headers that we pull, so no need to
- * update skb->csum here.
- */
-
- if (unlikely(parse_vlan(skb, key)))
- return -ENOMEM;
-
- key->eth.type = parse_ethertype(skb);
- if (unlikely(key->eth.type == htons(0)))
- return -ENOMEM;
-
- /* Multiple tagged packets need to retain TPID to satisfy
- * skb_vlan_pop(), which will later shift the ethertype into
- * skb->protocol.
- */
- if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK))
- skb->protocol = key->eth.cvlan.tpid;
- else
- skb->protocol = key->eth.type;
-
- skb_reset_network_header(skb);
- __skb_push(skb, skb->data - skb_mac_header(skb));
- }
- skb_reset_mac_len(skb);
/* Network layer. */
if (key->eth.type == htons(ETH_P_IP)) {
@@ -623,6 +560,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
offset = nh->frag_off & htons(IP_OFFSET);
if (offset) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
+ memset(&key->tp, 0, sizeof(key->tp));
return 0;
}
if (nh->frag_off & htons(IP_MF) ||
@@ -699,27 +637,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (eth_p_mpls(key->eth.type)) {
- size_t stack_len = MPLS_HLEN;
+ u8 label_count = 1;
+ memset(&key->mpls, 0, sizeof(key->mpls));
skb_set_inner_network_header(skb, skb->mac_len);
while (1) {
__be32 lse;
- error = check_header(skb, skb->mac_len + stack_len);
+ error = check_header(skb, skb->mac_len +
+ label_count * MPLS_HLEN);
if (unlikely(error))
return 0;
memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
- if (stack_len == MPLS_HLEN)
- memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+ if (label_count <= MPLS_LABEL_DEPTH)
+ memcpy(&key->mpls.lse[label_count - 1], &lse,
+ MPLS_HLEN);
- skb_set_inner_network_header(skb, skb->mac_len + stack_len);
+ skb_set_inner_network_header(skb, skb->mac_len +
+ label_count * MPLS_HLEN);
if (lse & htonl(MPLS_LS_S_MASK))
break;
- stack_len += MPLS_HLEN;
+ label_count++;
}
+ if (label_count > MPLS_LABEL_DEPTH)
+ label_count = MPLS_LABEL_DEPTH;
+
+ key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
@@ -740,8 +686,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return error;
}
- if (key->ip.frag == OVS_FRAG_TYPE_LATER)
+ if (key->ip.frag == OVS_FRAG_TYPE_LATER) {
+ memset(&key->tp, 0, sizeof(key->tp));
return 0;
+ }
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
@@ -788,6 +736,92 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return 0;
}
+/**
+ * key_extract - extracts a flow key from an Ethernet frame.
+ * @skb: sk_buff that contains the frame, with skb->data pointing to the
+ * Ethernet header
+ * @key: output flow key
+ *
+ * The caller must ensure that skb->len >= ETH_HLEN.
+ *
+ * Returns 0 if successful, otherwise a negative errno value.
+ *
+ * Initializes @skb header fields as follows:
+ *
+ * - skb->mac_header: the L2 header.
+ *
+ * - skb->network_header: just past the L2 header, or just past the
+ * VLAN header, to the first byte of the L2 payload.
+ *
+ * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
+ * on output, then just past the IP header, if one is present and
+ * of a correct length, otherwise the same as skb->network_header.
+ * For other key->eth.type values it is left untouched.
+ *
+ * - skb->protocol: the type of the data starting at skb->network_header.
+ * Equals to key->eth.type.
+ */
+static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ struct ethhdr *eth;
+
+ /* Flags are always used as part of stats */
+ key->tp.flags = 0;
+
+ skb_reset_mac_header(skb);
+
+ /* Link layer. */
+ clear_vlan(key);
+ if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
+ if (unlikely(eth_type_vlan(skb->protocol)))
+ return -EINVAL;
+
+ skb_reset_network_header(skb);
+ key->eth.type = skb->protocol;
+ } else {
+ eth = eth_hdr(skb);
+ ether_addr_copy(key->eth.src, eth->h_source);
+ ether_addr_copy(key->eth.dst, eth->h_dest);
+
+ __skb_pull(skb, 2 * ETH_ALEN);
+ /* We are going to push all headers that we pull, so no need to
+ * update skb->csum here.
+ */
+
+ if (unlikely(parse_vlan(skb, key)))
+ return -ENOMEM;
+
+ key->eth.type = parse_ethertype(skb);
+ if (unlikely(key->eth.type == htons(0)))
+ return -ENOMEM;
+
+ /* Multiple tagged packets need to retain TPID to satisfy
+ * skb_vlan_pop(), which will later shift the ethertype into
+ * skb->protocol.
+ */
+ if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK))
+ skb->protocol = key->eth.cvlan.tpid;
+ else
+ skb->protocol = key->eth.type;
+
+ skb_reset_network_header(skb);
+ __skb_push(skb, skb->data - skb_mac_header(skb));
+ }
+
+ skb_reset_mac_len(skb);
+
+ /* Fill out L3/L4 key info, if any */
+ return key_extract_l3l4(skb, key);
+}
+
+/* In the case of conntrack fragment handling it expects L3 headers,
+ * add a helper.
+ */
+int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ return key_extract_l3l4(skb, key);
+}
+
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
{
int res;
@@ -816,6 +850,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ struct tc_skb_ext *tc_ext;
+#endif
int res, err;
/* Extract metadata from packet. */
@@ -848,7 +885,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
if (res < 0)
return res;
key->mac_proto = res;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+ tc_ext = skb_ext_find(skb, TC_SKB_EXT);
+ key->recirc_id = tc_ext ? tc_ext->chain : 0;
+ } else {
+ key->recirc_id = 0;
+ }
+#else
key->recirc_id = 0;
+#endif
err = key_extract(skb, key);
if (!err)
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a5506e2d4b7a..758a8c77f736 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -30,13 +30,14 @@ enum sw_flow_mac_proto {
MAC_PROTO_ETHERNET,
};
#define SW_FLOW_KEY_INVALID 0x80
+#define MPLS_LABEL_DEPTH 3
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
*/
#define TUN_METADATA_OFFSET(opt_len) \
- (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
+ (sizeof_field(struct sw_flow_key, tun_opts) - opt_len)
#define TUN_METADATA_OPTS(flow_key, opt_len) \
((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
@@ -51,7 +52,7 @@ struct vlan_head {
#define OVS_SW_FLOW_KEY_METADATA_SIZE \
(offsetof(struct sw_flow_key, recirc_id) + \
- FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+ sizeof_field(struct sw_flow_key, recirc_id))
struct ovs_key_nsh {
struct ovs_nsh_key_base base;
@@ -85,9 +86,6 @@ struct sw_flow_key {
*/
union {
struct {
- __be32 top_lse; /* top label stack entry */
- } mpls;
- struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
@@ -135,6 +133,11 @@ struct sw_flow_key {
} nd;
};
} ipv6;
+ struct {
+ u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
+ __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */
+ } mpls;
+
struct ovs_key_nsh nsh; /* network service header */
};
struct {
@@ -166,7 +169,6 @@ struct sw_flow_key_range {
struct sw_flow_mask {
int ref_count;
struct rcu_head rcu;
- struct list_head list;
struct sw_flow_key_range range;
struct sw_flow_key key;
};
@@ -270,6 +272,7 @@ void ovs_flow_stats_clear(struct sw_flow *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key);
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb,
struct sw_flow_key *key);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d7559c64795d..7da4230627f5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -79,6 +79,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_SET_MASKED:
case OVS_ACTION_ATTR_METER:
case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+ case OVS_ACTION_ATTR_ADD_MPLS:
default:
return true;
}
@@ -424,7 +425,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
.next = ovs_tunnel_key_lens, },
- [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
+ [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE },
[OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
[OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
@@ -1628,10 +1629,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
const struct ovs_key_mpls *mpls_key;
+ u32 hdr_len;
+ u32 label_count, label_count_mask, i;
mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
- SW_FLOW_KEY_PUT(match, mpls.top_lse,
- mpls_key->mpls_lse, is_mask);
+ hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
+ label_count = hdr_len / sizeof(struct ovs_key_mpls);
+
+ if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
+ hdr_len % sizeof(struct ovs_key_mpls))
+ return -EINVAL;
+
+ label_count_mask = GENMASK(label_count - 1, 0);
+
+ for (i = 0 ; i < label_count; i++)
+ SW_FLOW_KEY_PUT(match, mpls.lse[i],
+ mpls_key[i].mpls_lse, is_mask);
+
+ SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
+ label_count_mask, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
}
@@ -2114,13 +2130,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
} else if (eth_p_mpls(swkey->eth.type)) {
+ u8 i, num_labels;
struct ovs_key_mpls *mpls_key;
- nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+ num_labels = hweight_long(output->mpls.num_labels_mask);
+ nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
+ num_labels * sizeof(*mpls_key));
if (!nla)
goto nla_put_failure;
+
mpls_key = nla_data(nla);
- mpls_key->mpls_lse = output->mpls.top_lse;
+ for (i = 0; i < num_labels; i++)
+ mpls_key[i].mpls_lse = output->mpls.lse[i];
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -2406,13 +2427,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci, bool log);
+ __be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count, bool log);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- bool log, bool last)
+ u32 mpls_label_count, bool log, bool last)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -2463,7 +2485,7 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return err;
err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2478,7 +2500,7 @@ static int validate_and_copy_clone(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- bool log, bool last)
+ u32 mpls_label_count, bool log, bool last)
{
int start, err;
u32 exec;
@@ -2498,7 +2520,7 @@ static int validate_and_copy_clone(struct net *net,
return err;
err = __ovs_nla_copy_actions(net, attr, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2864,6 +2886,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count,
bool log, bool last)
{
const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
@@ -2912,7 +2935,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2925,7 +2948,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2952,7 +2975,8 @@ static int copy_action(const struct nlattr *from,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci, bool log)
+ __be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count, bool log)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
@@ -2982,6 +3006,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_METER] = sizeof(u32),
[OVS_ACTION_ATTR_CLONE] = (u32)-1,
[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
+ [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3049,6 +3074,33 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_RECIRC:
break;
+ case OVS_ACTION_ATTR_ADD_MPLS: {
+ const struct ovs_action_add_mpls *mpls = nla_data(a);
+
+ if (!eth_p_mpls(mpls->mpls_ethertype))
+ return -EINVAL;
+
+ if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK) {
+ if (vlan_tci & htons(VLAN_CFI_MASK) ||
+ (eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6) &&
+ eth_type != htons(ETH_P_ARP) &&
+ eth_type != htons(ETH_P_RARP) &&
+ !eth_p_mpls(eth_type)))
+ return -EINVAL;
+ mpls_label_count++;
+ } else {
+ if (mac_proto == MAC_PROTO_ETHERNET) {
+ mpls_label_count = 1;
+ mac_proto = MAC_PROTO_NONE;
+ } else {
+ mpls_label_count++;
+ }
+ }
+ eth_type = mpls->mpls_ethertype;
+ break;
+ }
+
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nla_data(a);
@@ -3065,25 +3117,41 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
!eth_p_mpls(eth_type)))
return -EINVAL;
eth_type = mpls->mpls_ethertype;
+ mpls_label_count++;
break;
}
- case OVS_ACTION_ATTR_POP_MPLS:
+ case OVS_ACTION_ATTR_POP_MPLS: {
+ __be16 proto;
if (vlan_tci & htons(VLAN_CFI_MASK) ||
!eth_p_mpls(eth_type))
return -EINVAL;
- /* Disallow subsequent L2.5+ set and mpls_pop actions
- * as there is no check here to ensure that the new
- * eth_type is valid and thus set actions could
- * write off the end of the packet or otherwise
- * corrupt it.
+ /* Disallow subsequent L2.5+ set actions and mpls_pop
+ * actions once the last MPLS label in the packet is
+ * is popped as there is no check here to ensure that
+ * the new eth type is valid and thus set actions could
+ * write off the end of the packet or otherwise corrupt
+ * it.
*
* Support for these actions is planned using packet
* recirculation.
*/
- eth_type = htons(0);
+ proto = nla_get_be16(a);
+
+ if (proto == htons(ETH_P_TEB) &&
+ mac_proto != MAC_PROTO_NONE)
+ return -EINVAL;
+
+ mpls_label_count--;
+
+ if (!eth_p_mpls(proto) || !mpls_label_count)
+ eth_type = htons(0);
+ else
+ eth_type = proto;
+
break;
+ }
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa,
@@ -3106,6 +3174,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_sample(net, a, key, sfa,
eth_type, vlan_tci,
+ mpls_label_count,
log, last);
if (err)
return err;
@@ -3176,6 +3245,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_clone(net, a, key, sfa,
eth_type, vlan_tci,
+ mpls_label_count,
log, last);
if (err)
return err;
@@ -3188,8 +3258,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_check_pkt_len(net, a, key, sfa,
eth_type,
- vlan_tci, log,
- last);
+ vlan_tci,
+ mpls_label_count,
+ log, last);
if (err)
return err;
skip_copy = true;
@@ -3219,14 +3290,18 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
struct sw_flow_actions **sfa, bool log)
{
int err;
+ u32 mpls_label_count = 0;
*sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
+ if (eth_p_mpls(key->eth.type))
+ mpls_label_count = hweight_long(key->mpls.num_labels_mask);
+
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
- key->eth.vlan.tci, log);
+ key->eth.vlan.tci, mpls_label_count, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf3582c5ed70..5904e93e5765 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -34,8 +34,13 @@
#include <net/ndisc.h>
#define TBL_MIN_BUCKETS 1024
+#define MASK_ARRAY_SIZE_MIN 16
#define REHASH_INTERVAL (10 * 60 * HZ)
+#define MC_HASH_SHIFT 8
+#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
+#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
+
static struct kmem_cache *flow_cache;
struct kmem_cache *flow_stats_cache __read_mostly;
@@ -164,14 +169,133 @@ static struct table_instance *table_instance_alloc(int new_size)
return ti;
}
+static struct mask_array *tbl_mask_array_alloc(int size)
+{
+ struct mask_array *new;
+
+ size = max(MASK_ARRAY_SIZE_MIN, size);
+ new = kzalloc(sizeof(struct mask_array) +
+ sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ new->count = 0;
+ new->max = size;
+
+ return new;
+}
+
+static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
+{
+ struct mask_array *old;
+ struct mask_array *new;
+
+ new = tbl_mask_array_alloc(size);
+ if (!new)
+ return -ENOMEM;
+
+ old = ovsl_dereference(tbl->mask_array);
+ if (old) {
+ int i;
+
+ for (i = 0; i < old->max; i++) {
+ if (ovsl_dereference(old->masks[i]))
+ new->masks[new->count++] = old->masks[i];
+ }
+ }
+
+ rcu_assign_pointer(tbl->mask_array, new);
+ kfree_rcu(old, rcu);
+
+ return 0;
+}
+
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+ struct sw_flow_mask *new)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int err, ma_count = READ_ONCE(ma->count);
+
+ if (ma_count >= ma->max) {
+ err = tbl_mask_array_realloc(tbl, ma->max +
+ MASK_ARRAY_SIZE_MIN);
+ if (err)
+ return err;
+
+ ma = ovsl_dereference(tbl->mask_array);
+ }
+
+ BUG_ON(ovsl_dereference(ma->masks[ma_count]));
+
+ rcu_assign_pointer(ma->masks[ma_count], new);
+ WRITE_ONCE(ma->count, ma_count +1);
+
+ return 0;
+}
+
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+ struct sw_flow_mask *mask)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i, ma_count = READ_ONCE(ma->count);
+
+ /* Remove the deleted mask pointers from the array */
+ for (i = 0; i < ma_count; i++) {
+ if (mask == ovsl_dereference(ma->masks[i]))
+ goto found;
+ }
+
+ BUG();
+ return;
+
+found:
+ WRITE_ONCE(ma->count, ma_count -1);
+
+ rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
+ RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+
+ kfree_rcu(mask, rcu);
+
+ /* Shrink the mask array if necessary. */
+ if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+ ma_count <= (ma->max / 3))
+ tbl_mask_array_realloc(tbl, ma->max / 2);
+}
+
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ if (mask) {
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count)
+ tbl_mask_array_del_mask(tbl, mask);
+ }
+}
+
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
+ struct mask_array *ma;
- ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
+ MC_HASH_ENTRIES,
+ __alignof__(struct mask_cache_entry));
+ if (!table->mask_cache)
+ return -ENOMEM;
+ ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
+ if (!ma)
+ goto free_mask_cache;
+
+ ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
- return -ENOMEM;
+ goto free_mask_array;
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti)
@@ -179,7 +303,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
rcu_assign_pointer(table->ti, ti);
rcu_assign_pointer(table->ufid_ti, ufid_ti);
- INIT_LIST_HEAD(&table->mask_list);
+ rcu_assign_pointer(table->mask_array, ma);
table->last_rehash = jiffies;
table->count = 0;
table->ufid_count = 0;
@@ -187,6 +311,10 @@ int ovs_flow_tbl_init(struct flow_table *table)
free_ti:
__table_instance_destroy(ti);
+free_mask_array:
+ kfree(ma);
+free_mask_cache:
+ free_percpu(table->mask_cache);
return -ENOMEM;
}
@@ -197,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti);
}
-static void table_instance_destroy(struct table_instance *ti,
+static void table_instance_flow_free(struct flow_table *table,
+ struct table_instance *ti,
+ struct table_instance *ufid_ti,
+ struct sw_flow *flow,
+ bool count)
+{
+ hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
+ if (count)
+ table->count--;
+
+ if (ovs_identifier_is_ufid(&flow->id)) {
+ hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+
+ if (count)
+ table->ufid_count--;
+ }
+
+ flow_mask_remove(table, flow->mask);
+}
+
+static void table_instance_destroy(struct flow_table *table,
+ struct table_instance *ti,
struct table_instance *ufid_ti,
bool deferred)
{
@@ -214,13 +363,12 @@ static void table_instance_destroy(struct table_instance *ti,
struct sw_flow *flow;
struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n;
- int ver = ti->node_ver;
- int ufid_ver = ufid_ti->node_ver;
- hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
- hlist_del_rcu(&flow->flow_table.node[ver]);
- if (ovs_identifier_is_ufid(&flow->id))
- hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
+ hlist_for_each_entry_safe(flow, n, head,
+ flow_table.node[ti->node_ver]) {
+
+ table_instance_flow_free(table, ti, ufid_ti,
+ flow, false);
ovs_flow_free(flow, deferred);
}
}
@@ -243,7 +391,9 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
struct table_instance *ti = rcu_dereference_raw(table->ti);
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
- table_instance_destroy(ti, ufid_ti, false);
+ free_percpu(table->mask_cache);
+ kfree_rcu(rcu_dereference_raw(table->mask_array), rcu);
+ table_instance_destroy(table, ti, ufid_ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -359,7 +509,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
flow_table->count = 0;
flow_table->ufid_count = 0;
- table_instance_destroy(old_ti, old_ufid_ti, true);
+ table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
return 0;
err_free_ti:
@@ -370,13 +520,10 @@ err_free_ti:
static u32 flow_hash(const struct sw_flow_key *key,
const struct sw_flow_key_range *range)
{
- int key_start = range->start;
- int key_end = range->end;
- const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
- int hash_u32s = (key_end - key_start) >> 2;
+ const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
/* Make sure number of hash bytes are multiple of u32. */
- BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+ int hash_u32s = range_n_bytes(range) >> 2;
return jhash2(hash_key, hash_u32s, 0);
}
@@ -425,7 +572,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
const struct sw_flow_key *unmasked,
- const struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask,
+ u32 *n_mask_hit)
{
struct sw_flow *flow;
struct hlist_head *head;
@@ -435,6 +583,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
ovs_flow_mask_key(&masked_key, unmasked, false, mask);
hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash);
+ (*n_mask_hit)++;
+
hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
@@ -443,46 +593,147 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
return NULL;
}
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
- const struct sw_flow_key *key,
- u32 *n_mask_hit)
+/* Flow lookup does full lookup on flow table. It starts with
+ * mask from index passed in *index.
+ */
+static struct sw_flow *flow_lookup(struct flow_table *tbl,
+ struct table_instance *ti,
+ struct mask_array *ma,
+ const struct sw_flow_key *key,
+ u32 *n_mask_hit,
+ u32 *index)
{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct sw_flow *flow;
struct sw_flow_mask *mask;
+ int i;
+
+ if (likely(*index < ma->max)) {
+ mask = rcu_dereference_ovsl(ma->masks[*index]);
+ if (mask) {
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+ if (flow)
+ return flow;
+ }
+ }
+
+ for (i = 0; i < ma->max; i++) {
+
+ if (i == *index)
+ continue;
+
+ mask = rcu_dereference_ovsl(ma->masks[i]);
+ if (unlikely(!mask))
+ break;
+
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+ if (flow) { /* Found */
+ *index = i;
+ return flow;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * mask_cache maps flow to probable mask. This cache is not tightly
+ * coupled cache, It means updates to mask list can result in inconsistent
+ * cache entry in mask cache.
+ * This is per cpu cache and is divided in MC_HASH_SEGS segments.
+ * In case of a hash collision the entry is hashed in next segment.
+ * */
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
+ const struct sw_flow_key *key,
+ u32 skb_hash,
+ u32 *n_mask_hit)
+{
+ struct mask_array *ma = rcu_dereference(tbl->mask_array);
+ struct table_instance *ti = rcu_dereference(tbl->ti);
+ struct mask_cache_entry *entries, *ce;
struct sw_flow *flow;
+ u32 hash;
+ int seg;
*n_mask_hit = 0;
- list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
- (*n_mask_hit)++;
- flow = masked_flow_lookup(ti, key, mask);
- if (flow) /* Found */
+ if (unlikely(!skb_hash)) {
+ u32 mask_index = 0;
+
+ return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
+ }
+
+ /* Pre and post recirulation flows usually have the same skb_hash
+ * value. To avoid hash collisions, rehash the 'skb_hash' with
+ * 'recirc_id'. */
+ if (key->recirc_id)
+ skb_hash = jhash_1word(skb_hash, key->recirc_id);
+
+ ce = NULL;
+ hash = skb_hash;
+ entries = this_cpu_ptr(tbl->mask_cache);
+
+ /* Find the cache entry 'ce' to operate on. */
+ for (seg = 0; seg < MC_HASH_SEGS; seg++) {
+ int index = hash & (MC_HASH_ENTRIES - 1);
+ struct mask_cache_entry *e;
+
+ e = &entries[index];
+ if (e->skb_hash == skb_hash) {
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
+ &e->mask_index);
+ if (!flow)
+ e->skb_hash = 0;
return flow;
+ }
+
+ if (!ce || e->skb_hash < ce->skb_hash)
+ ce = e; /* A better replacement cache candidate. */
+
+ hash >>= MC_HASH_SHIFT;
}
- return NULL;
+
+ /* Cache miss, do full lookup. */
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
+ if (flow)
+ ce->skb_hash = skb_hash;
+
+ return flow;
}
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
const struct sw_flow_key *key)
{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
u32 __always_unused n_mask_hit;
+ u32 index = 0;
- return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
+ return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
const struct sw_flow_match *match)
{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
- struct sw_flow_mask *mask;
- struct sw_flow *flow;
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i;
/* Always called under ovs-mutex. */
- list_for_each_entry(mask, &tbl->mask_list, list) {
- flow = masked_flow_lookup(ti, match->key, mask);
+ for (i = 0; i < ma->max; i++) {
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ u32 __always_unused n_mask_hit;
+ struct sw_flow_mask *mask;
+ struct sw_flow *flow;
+
+ mask = ovsl_dereference(ma->masks[i]);
+ if (!mask)
+ continue;
+
+ flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
if (flow && ovs_identifier_is_key(&flow->id) &&
- ovs_flow_cmp_unmasked_key(flow, match))
+ ovs_flow_cmp_unmasked_key(flow, match)) {
return flow;
+ }
}
+
return NULL;
}
@@ -528,13 +779,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
- struct sw_flow_mask *mask;
- int num = 0;
-
- list_for_each_entry(mask, &table->mask_list, list)
- num++;
-
- return num;
+ struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
+ return READ_ONCE(ma->count);
}
static struct table_instance *table_instance_expand(struct table_instance *ti,
@@ -543,24 +789,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
-/* Remove 'mask' from the mask list, if it is not needed any more. */
-static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
-{
- if (mask) {
- /* ovs-lock is required to protect mask-refcount and
- * mask list.
- */
- ASSERT_OVSL();
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- kfree_rcu(mask, rcu);
- }
- }
-}
-
/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
@@ -568,17 +796,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
- hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
- table->count--;
- if (ovs_identifier_is_ufid(&flow->id)) {
- hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
- table->ufid_count--;
- }
-
- /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
- * accessible as long as the RCU read lock is held.
- */
- flow_mask_remove(table, flow->mask);
+ table_instance_flow_free(table, ti, ufid_ti, flow, true);
}
static struct sw_flow_mask *mask_alloc(void)
@@ -606,13 +824,16 @@ static bool mask_equal(const struct sw_flow_mask *a,
static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
const struct sw_flow_mask *mask)
{
- struct list_head *ml;
+ struct mask_array *ma;
+ int i;
+
+ ma = ovsl_dereference(tbl->mask_array);
+ for (i = 0; i < ma->max; i++) {
+ struct sw_flow_mask *t;
+ t = ovsl_dereference(ma->masks[i]);
- list_for_each(ml, &tbl->mask_list) {
- struct sw_flow_mask *m;
- m = container_of(ml, struct sw_flow_mask, list);
- if (mask_equal(mask, m))
- return m;
+ if (t && mask_equal(mask, t))
+ return t;
}
return NULL;
@@ -623,6 +844,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
const struct sw_flow_mask *new)
{
struct sw_flow_mask *mask;
+
mask = flow_mask_find(tbl, new);
if (!mask) {
/* Allocate a new mask if none exsits. */
@@ -631,7 +853,12 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
return -ENOMEM;
mask->key = new->key;
mask->range = new->range;
- list_add_rcu(&mask->list, &tbl->mask_list);
+
+ /* Add mask to mask-list. */
+ if (tbl_mask_array_add_mask(tbl, mask)) {
+ kfree(mask);
+ return -ENOMEM;
+ }
} else {
BUG_ON(!mask->ref_count);
mask->ref_count++;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index bc52045b63ff..8a5cea6ae111 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -22,6 +22,17 @@
#include "flow.h"
+struct mask_cache_entry {
+ u32 skb_hash;
+ u32 mask_index;
+};
+
+struct mask_array {
+ struct rcu_head rcu;
+ int count, max;
+ struct sw_flow_mask __rcu *masks[];
+};
+
struct table_instance {
struct hlist_head *buckets;
unsigned int n_buckets;
@@ -34,7 +45,8 @@ struct table_instance {
struct flow_table {
struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti;
- struct list_head mask_list;
+ struct mask_cache_entry __percpu *mask_cache;
+ struct mask_array __rcu *mask_array;
unsigned long last_rehash;
unsigned int count;
unsigned int ufid_count;
@@ -60,8 +72,9 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table);
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
u32 *bucket, u32 *idx);
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
- const struct sw_flow_key *,
- u32 *n_mask_hit);
+ const struct sw_flow_key *,
+ u32 skb_hash,
+ u32 *n_mask_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index d2437b5b2f6a..58a7b8312c28 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -137,7 +137,7 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
IFF_NO_QUEUE;
netdev->needs_free_netdev = true;
- netdev->priv_destructor = internal_dev_destructor;
+ netdev->priv_destructor = NULL;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
@@ -159,7 +159,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
struct internal_dev *internal_dev;
struct net_device *dev;
int err;
- bool free_vport = true;
vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms);
if (IS_ERR(vport)) {
@@ -190,10 +189,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
rtnl_lock();
err = register_netdevice(vport->dev);
- if (err) {
- free_vport = false;
+ if (err)
goto error_unlock;
- }
+ vport->dev->priv_destructor = internal_dev_destructor;
dev_set_promiscuity(vport->dev, 1);
rtnl_unlock();
@@ -207,8 +205,7 @@ error_unlock:
error_free_netdev:
free_netdev(dev);
error_free_vport:
- if (free_vport)
- ovs_vport_free(vport);
+ ovs_vport_free(vport);
error:
return ERR_PTR(err);
}
@@ -237,7 +234,7 @@ static netdev_tx_t internal_dev_recv(struct sk_buff *skb)
}
skb_dst_drop(skb);
- nf_reset(skb);
+ nf_reset_ct(skb);
secpath_reset(skb);
skb->pkt_type = PACKET_HOST;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 3fc38d16c456..5da9392b03d6 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -403,8 +403,9 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
ids = rcu_dereference(vport->upcall_portids);
- if (ids->n_ids == 1 && ids->ids[0] == 0)
- return 0;
+ /* If there is only one portid, select it in the fast-path. */
+ if (ids->n_ids == 1)
+ return ids->ids[0];
hash = skb_get_hash(skb);
ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
OpenPOWER on IntegriCloud