summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c11
-rw-r--r--net/caif/chnl_net.c2
-rw-r--r--net/ceph/Makefile1
-rw-r--r--net/ceph/ceph_common.c8
-rw-r--r--net/ceph/crypto.c6
-rw-r--r--net/ceph/debugfs.c17
-rw-r--r--net/ceph/messenger.c188
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/ceph/osd_client.c67
-rw-r--r--net/ceph/osdmap.c71
-rw-r--r--net/ceph/striper.c261
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/neighbour.c40
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/core/utils.c23
-rw-r--r--net/dns_resolver/dns_key.c12
-rw-r--r--net/ife/ife.c38
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic_main.c2
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/l2tp/l2tp_core.c265
-rw-r--r--net/l2tp/l2tp_core.h7
-rw-r--r--net/l2tp/l2tp_debugfs.c18
-rw-r--r--net/l2tp/l2tp_netlink.c33
-rw-r--r--net/l2tp/l2tp_ppp.c36
-rw-r--r--net/llc/af_llc.c14
-rw-r--r--net/llc/llc_c_ac.c9
-rw-r--r--net/llc/llc_conn.c22
-rw-r--r--net/netlabel/netlabel_unlabeled.c10
-rw-r--r--net/packet/af_packet.c23
-rw-r--r--net/qrtr/qrtr.c1
-rw-r--r--net/rds/send.c15
-rw-r--r--net/sched/act_ife.c9
-rw-r--r--net/sctp/chunk.c10
-rw-r--r--net/sctp/ipv6.c105
-rw-r--r--net/sctp/output.c34
-rw-r--r--net/sctp/protocol.c43
-rw-r--r--net/sctp/sm_make_chunk.c12
-rw-r--r--net/sctp/sm_statefuns.c18
-rw-r--r--net/sctp/socket.c70
-rw-r--r--net/smc/af_smc.c10
-rw-r--r--net/socket.c2
-rw-r--r--net/strparser/strparser.c9
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c3
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c5
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c4
-rw-r--r--net/sunrpc/cache.c32
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--net/sunrpc/sched.c10
-rw-r--r--net/sunrpc/stats.c16
-rw-r--r--net/sunrpc/sunrpc.h6
-rw-r--r--net/sunrpc/svc.c118
-rw-r--r--net/sunrpc/svc_xprt.c34
-rw-r--r--net/sunrpc/svcsock.c8
-rw-r--r--net/sunrpc/xdr.c82
-rw-r--r--net/sunrpc/xprt.c34
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c7
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c13
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c53
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c32
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c33
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c38
-rw-r--r--net/sunrpc/xprtrdma/transport.c43
-rw-r--r--net/sunrpc/xprtrdma/verbs.c44
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h4
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/tipc/monitor.c2
-rw-r--r--net/tipc/name_table.c34
-rw-r--r--net/tipc/name_table.h2
-rw-r--r--net/tipc/net.c2
-rw-r--r--net/tipc/netlink.c5
-rw-r--r--net/tipc/node.c11
-rw-r--r--net/tipc/socket.c4
-rw-r--r--net/tipc/subscr.c5
-rw-r--r--net/tls/tls_sw.c10
-rw-r--r--net/vmw_vsock/af_vsock.c6
85 files changed, 1572 insertions, 713 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index b433aff5ff13..21e6df1cc70f 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -190,7 +190,9 @@ static int parse_opts(char *opts, struct p9_client *clnt)
p9_debug(P9_DEBUG_ERROR,
"problem allocating copy of trans arg\n");
goto free_and_return;
- }
+ }
+
+ v9fs_put_trans(clnt->trans_mod);
clnt->trans_mod = v9fs_get_trans_by_name(s);
if (clnt->trans_mod == NULL) {
pr_info("Could not find request transport: %s\n",
@@ -226,6 +228,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
}
free_and_return:
+ v9fs_put_trans(clnt->trans_mod);
kfree(tmp_options);
return ret;
}
@@ -769,7 +772,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
if (err < 0) {
if (err != -ERESTARTSYS && err != -EFAULT)
c->status = Disconnected;
- goto reterr;
+ goto recalc_sigpending;
}
again:
/* Wait for the response */
@@ -804,6 +807,7 @@ again:
if (req->status == REQ_STATUS_RCVD)
err = 0;
}
+recalc_sigpending:
if (sigpending) {
spin_lock_irqsave(&current->sighand->siglock, flags);
recalc_sigpending();
@@ -867,7 +871,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
if (err == -EIO)
c->status = Disconnected;
if (err != -ERESTARTSYS)
- goto reterr;
+ goto recalc_sigpending;
}
if (req->status == REQ_STATUS_ERROR) {
p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
@@ -885,6 +889,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
if (req->status == REQ_STATUS_RCVD)
err = 0;
}
+recalc_sigpending:
if (sigpending) {
spin_lock_irqsave(&current->sighand->siglock, flags);
recalc_sigpending();
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 53ecda10b790..13e2ae6be620 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -174,7 +174,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" :
flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" :
flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ?
- "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND");
+ "REMOTE_SHUTDOWN" : "UNKNOWN CTRL COMMAND");
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index b4bded4b5396..12bf49772d24 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -8,6 +8,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
mon_client.o \
cls_lock_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+ striper.o \
debugfs.o \
auth.o auth_none.o \
crypto.o armor.o \
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4adf07826f4a..584fdbef2088 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -72,6 +72,7 @@ const char *ceph_msg_type_name(int type)
case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
case CEPH_MSG_MDS_MAP: return "mds_map";
+ case CEPH_MSG_FS_MAP_USER: return "fs_map_user";
case CEPH_MSG_CLIENT_SESSION: return "client_session";
case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
case CEPH_MSG_CLIENT_REQUEST: return "client_request";
@@ -79,8 +80,13 @@ const char *ceph_msg_type_name(int type)
case CEPH_MSG_CLIENT_REPLY: return "client_reply";
case CEPH_MSG_CLIENT_CAPS: return "client_caps";
case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+ case CEPH_MSG_CLIENT_QUOTA: return "client_quota";
case CEPH_MSG_CLIENT_SNAP: return "client_snap";
case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+ case CEPH_MSG_POOLOP_REPLY: return "poolop_reply";
+ case CEPH_MSG_POOLOP: return "poolop";
+ case CEPH_MSG_MON_COMMAND: return "mon_command";
+ case CEPH_MSG_MON_COMMAND_ACK: return "mon_command_ack";
case CEPH_MSG_OSD_MAP: return "osd_map";
case CEPH_MSG_OSD_OP: return "osd_op";
case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
@@ -217,7 +223,7 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
if (i == 16)
err = 0;
- dout("parse_fsid ret %d got fsid %pU", err, fsid);
+ dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
return err;
}
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index bf9d079cbafd..02172c408ff2 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -347,10 +347,12 @@ struct key_type key_type_ceph = {
.destroy = ceph_key_destroy,
};
-int ceph_crypto_init(void) {
+int __init ceph_crypto_init(void)
+{
return register_key_type(&key_type_ceph);
}
-void ceph_crypto_shutdown(void) {
+void ceph_crypto_shutdown(void)
+{
unregister_key_type(&key_type_ceph);
}
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 1eef6806aa1a..02952605d121 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -389,7 +389,7 @@ CEPH_DEFINE_SHOW_FUNC(monc_show)
CEPH_DEFINE_SHOW_FUNC(osdc_show)
CEPH_DEFINE_SHOW_FUNC(client_options_show)
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
{
ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
if (!ceph_debugfs_dir)
@@ -418,7 +418,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->monc.debugfs_file = debugfs_create_file("monc",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&monc_show_fops);
@@ -426,7 +426,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->osdc.debugfs_file = debugfs_create_file("osdc",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&osdc_show_fops);
@@ -434,7 +434,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->debugfs_monmap = debugfs_create_file("monmap",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&monmap_show_fops);
@@ -442,7 +442,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->debugfs_osdmap = debugfs_create_file("osdmap",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&osdmap_show_fops);
@@ -450,7 +450,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
goto out;
client->debugfs_options = debugfs_create_file("client_options",
- 0600,
+ 0400,
client->debugfs_dir,
client,
&client_options_show_fops);
@@ -477,7 +477,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
#else /* CONFIG_DEBUG_FS */
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
{
return 0;
}
@@ -496,6 +496,3 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
}
#endif /* CONFIG_DEBUG_FS */
-
-EXPORT_SYMBOL(ceph_debugfs_init);
-EXPORT_SYMBOL(ceph_debugfs_cleanup);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8a4d3758030b..fcb40c12b1f8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -277,7 +277,7 @@ static void _ceph_msgr_exit(void)
ceph_msgr_slab_exit();
}
-int ceph_msgr_init(void)
+int __init ceph_msgr_init(void)
{
if (ceph_msgr_slab_init())
return -ENOMEM;
@@ -299,7 +299,6 @@ int ceph_msgr_init(void)
return -ENOMEM;
}
-EXPORT_SYMBOL(ceph_msgr_init);
void ceph_msgr_exit(void)
{
@@ -307,7 +306,6 @@ void ceph_msgr_exit(void)
_ceph_msgr_exit();
}
-EXPORT_SYMBOL(ceph_msgr_exit);
void ceph_msgr_flush(void)
{
@@ -839,93 +837,112 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
size_t length)
{
struct ceph_msg_data *data = cursor->data;
- struct bio *bio;
+ struct ceph_bio_iter *it = &cursor->bio_iter;
- BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+ cursor->resid = min_t(size_t, length, data->bio_length);
+ *it = data->bio_pos;
+ if (cursor->resid < it->iter.bi_size)
+ it->iter.bi_size = cursor->resid;
- bio = data->bio;
- BUG_ON(!bio);
-
- cursor->resid = min(length, data->bio_length);
- cursor->bio = bio;
- cursor->bvec_iter = bio->bi_iter;
- cursor->last_piece =
- cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
+ BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+ cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
}
static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
size_t *page_offset,
size_t *length)
{
- struct ceph_msg_data *data = cursor->data;
- struct bio *bio;
- struct bio_vec bio_vec;
-
- BUG_ON(data->type != CEPH_MSG_DATA_BIO);
-
- bio = cursor->bio;
- BUG_ON(!bio);
-
- bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
-
- *page_offset = (size_t) bio_vec.bv_offset;
- BUG_ON(*page_offset >= PAGE_SIZE);
- if (cursor->last_piece) /* pagelist offset is always 0 */
- *length = cursor->resid;
- else
- *length = (size_t) bio_vec.bv_len;
- BUG_ON(*length > cursor->resid);
- BUG_ON(*page_offset + *length > PAGE_SIZE);
+ struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio,
+ cursor->bio_iter.iter);
- return bio_vec.bv_page;
+ *page_offset = bv.bv_offset;
+ *length = bv.bv_len;
+ return bv.bv_page;
}
static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
size_t bytes)
{
- struct bio *bio;
- struct bio_vec bio_vec;
+ struct ceph_bio_iter *it = &cursor->bio_iter;
- BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+ BUG_ON(bytes > cursor->resid);
+ BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
+ cursor->resid -= bytes;
+ bio_advance_iter(it->bio, &it->iter, bytes);
- bio = cursor->bio;
- BUG_ON(!bio);
+ if (!cursor->resid) {
+ BUG_ON(!cursor->last_piece);
+ return false; /* no more data */
+ }
- bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
+ if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
+ return false; /* more bytes to process in this segment */
- /* Advance the cursor offset */
+ if (!it->iter.bi_size) {
+ it->bio = it->bio->bi_next;
+ it->iter = it->bio->bi_iter;
+ if (cursor->resid < it->iter.bi_size)
+ it->iter.bi_size = cursor->resid;
+ }
- BUG_ON(cursor->resid < bytes);
- cursor->resid -= bytes;
+ BUG_ON(cursor->last_piece);
+ BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+ cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
+ return true;
+}
+#endif /* CONFIG_BLOCK */
- bio_advance_iter(bio, &cursor->bvec_iter, bytes);
+static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct bio_vec *bvecs = data->bvec_pos.bvecs;
- if (bytes < bio_vec.bv_len)
- return false; /* more bytes to process in this segment */
+ cursor->resid = min_t(size_t, length, data->bvec_pos.iter.bi_size);
+ cursor->bvec_iter = data->bvec_pos.iter;
+ cursor->bvec_iter.bi_size = cursor->resid;
- /* Move on to the next segment, and possibly the next bio */
+ BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+ cursor->last_piece =
+ cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
+}
- if (!cursor->bvec_iter.bi_size) {
- bio = bio->bi_next;
- cursor->bio = bio;
- if (bio)
- cursor->bvec_iter = bio->bi_iter;
- else
- memset(&cursor->bvec_iter, 0,
- sizeof(cursor->bvec_iter));
- }
+static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset,
+ size_t *length)
+{
+ struct bio_vec bv = bvec_iter_bvec(cursor->data->bvec_pos.bvecs,
+ cursor->bvec_iter);
+
+ *page_offset = bv.bv_offset;
+ *length = bv.bv_len;
+ return bv.bv_page;
+}
+
+static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
+
+ BUG_ON(bytes > cursor->resid);
+ BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
+ cursor->resid -= bytes;
+ bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes);
- if (!cursor->last_piece) {
- BUG_ON(!cursor->resid);
- BUG_ON(!bio);
- /* A short read is OK, so use <= rather than == */
- if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
- cursor->last_piece = true;
+ if (!cursor->resid) {
+ BUG_ON(!cursor->last_piece);
+ return false; /* no more data */
}
+ if (!bytes || cursor->bvec_iter.bi_bvec_done)
+ return false; /* more bytes to process in this segment */
+
+ BUG_ON(cursor->last_piece);
+ BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+ cursor->last_piece =
+ cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
return true;
}
-#endif /* CONFIG_BLOCK */
/*
* For a page array, a piece comes from the first page in the array
@@ -1110,6 +1127,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
ceph_msg_data_bio_cursor_init(cursor, length);
break;
#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_BVECS:
+ ceph_msg_data_bvecs_cursor_init(cursor, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
/* BUG(); */
@@ -1158,14 +1178,19 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
page = ceph_msg_data_bio_next(cursor, page_offset, length);
break;
#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_BVECS:
+ page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
page = NULL;
break;
}
+
BUG_ON(!page);
BUG_ON(*page_offset + *length > PAGE_SIZE);
BUG_ON(!*length);
+ BUG_ON(*length > cursor->resid);
if (last_piece)
*last_piece = cursor->last_piece;
@@ -1194,6 +1219,9 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
new_piece = ceph_msg_data_bio_advance(cursor, bytes);
break;
#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_BVECS:
+ new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
+ break;
case CEPH_MSG_DATA_NONE:
default:
BUG();
@@ -1575,13 +1603,18 @@ static int write_partial_message_data(struct ceph_connection *con)
* been revoked, so use the zero page.
*/
crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
- while (cursor->resid) {
+ while (cursor->total_resid) {
struct page *page;
size_t page_offset;
size_t length;
bool last_piece;
int ret;
+ if (!cursor->resid) {
+ ceph_msg_data_advance(cursor, 0);
+ continue;
+ }
+
page = ceph_msg_data_next(cursor, &page_offset, &length,
&last_piece);
ret = ceph_tcp_sendpage(con->sock, page, page_offset,
@@ -2297,7 +2330,12 @@ static int read_partial_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
- while (cursor->resid) {
+ while (cursor->total_resid) {
+ if (!cursor->resid) {
+ ceph_msg_data_advance(cursor, 0);
+ continue;
+ }
+
page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
if (ret <= 0) {
@@ -3262,16 +3300,14 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
#ifdef CONFIG_BLOCK
-void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
- size_t length)
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
+ u32 length)
{
struct ceph_msg_data *data;
- BUG_ON(!bio);
-
data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
BUG_ON(!data);
- data->bio = bio;
+ data->bio_pos = *bio_pos;
data->bio_length = length;
list_add_tail(&data->links, &msg->data);
@@ -3280,6 +3316,20 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
EXPORT_SYMBOL(ceph_msg_data_add_bio);
#endif /* CONFIG_BLOCK */
+void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
+ struct ceph_bvec_iter *bvec_pos)
+{
+ struct ceph_msg_data *data;
+
+ data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
+ BUG_ON(!data);
+ data->bvec_pos = *bvec_pos;
+
+ list_add_tail(&data->links, &msg->data);
+ msg->data_length += bvec_pos->iter.bi_size;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+
/*
* construct a new message with given type, size
* the new msg has a ref count of 1.
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 1547107f4854..b3dac24412d3 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -60,7 +60,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
num_mon = ceph_decode_32(&p);
ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
- if (num_mon >= CEPH_MAX_MON)
+ if (num_mon > CEPH_MAX_MON)
goto bad;
m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
if (m == NULL)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 2814dba5902d..ea2a6c9fb7ce 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -20,6 +20,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/pagelist.h>
+#include <linux/ceph/striper.h>
#define OSD_OPREPLY_FRONT_LEN 512
@@ -103,13 +104,12 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
u64 *objnum, u64 *objoff, u64 *objlen)
{
u64 orig_len = *plen;
- int r;
+ u32 xlen;
/* object extent? */
- r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
- objoff, objlen);
- if (r < 0)
- return r;
+ ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+ objoff, &xlen);
+ *objlen = xlen;
if (*objlen < orig_len) {
*plen = *objlen;
dout(" skipping last %llu, final file extent %llu~%llu\n",
@@ -117,7 +117,6 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
}
dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
-
return 0;
}
@@ -148,14 +147,22 @@ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
#ifdef CONFIG_BLOCK
static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
- struct bio *bio, size_t bio_length)
+ struct ceph_bio_iter *bio_pos,
+ u32 bio_length)
{
osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
- osd_data->bio = bio;
+ osd_data->bio_pos = *bio_pos;
osd_data->bio_length = bio_length;
}
#endif /* CONFIG_BLOCK */
+static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
+ struct ceph_bvec_iter *bvec_pos)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_BVECS;
+ osd_data->bvec_pos = *bvec_pos;
+}
+
#define osd_req_op_data(oreq, whch, typ, fld) \
({ \
struct ceph_osd_request *__oreq = (oreq); \
@@ -218,16 +225,29 @@ EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
#ifdef CONFIG_BLOCK
void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
- unsigned int which, struct bio *bio, size_t bio_length)
+ unsigned int which,
+ struct ceph_bio_iter *bio_pos,
+ u32 bio_length)
{
struct ceph_osd_data *osd_data;
osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
- ceph_osd_data_bio_init(osd_data, bio, bio_length);
+ ceph_osd_data_bio_init(osd_data, bio_pos, bio_length);
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
#endif /* CONFIG_BLOCK */
+void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ struct ceph_bvec_iter *bvec_pos)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_data_bvecs_init(osd_data, bvec_pos);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
+
static void osd_req_op_cls_request_info_pagelist(
struct ceph_osd_request *osd_req,
unsigned int which, struct ceph_pagelist *pagelist)
@@ -265,6 +285,23 @@ void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
}
EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
+void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ struct bio_vec *bvecs, u32 bytes)
+{
+ struct ceph_osd_data *osd_data;
+ struct ceph_bvec_iter it = {
+ .bvecs = bvecs,
+ .iter = { .bi_size = bytes },
+ };
+
+ osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+ ceph_osd_data_bvecs_init(osd_data, &it);
+ osd_req->r_ops[which].cls.indata_len += bytes;
+ osd_req->r_ops[which].indata_len += bytes;
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_bvecs);
+
void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
unsigned int which, struct page **pages, u64 length,
u32 alignment, bool pages_from_pool, bool own_pages)
@@ -290,6 +327,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
case CEPH_OSD_DATA_TYPE_BIO:
return (u64)osd_data->bio_length;
#endif /* CONFIG_BLOCK */
+ case CEPH_OSD_DATA_TYPE_BVECS:
+ return osd_data->bvec_pos.iter.bi_size;
default:
WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
return 0;
@@ -828,8 +867,10 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
#ifdef CONFIG_BLOCK
} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
- ceph_msg_data_add_bio(msg, osd_data->bio, length);
+ ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length);
#endif
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
+ ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
} else {
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
}
@@ -5065,7 +5106,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
}
EXPORT_SYMBOL(ceph_osdc_writepages);
-int ceph_osdc_setup(void)
+int __init ceph_osdc_setup(void)
{
size_t size = sizeof(struct ceph_osd_request) +
CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
@@ -5076,7 +5117,6 @@ int ceph_osdc_setup(void)
return ceph_osd_request_cache ? 0 : -ENOMEM;
}
-EXPORT_SYMBOL(ceph_osdc_setup);
void ceph_osdc_cleanup(void)
{
@@ -5084,7 +5124,6 @@ void ceph_osdc_cleanup(void)
kmem_cache_destroy(ceph_osd_request_cache);
ceph_osd_request_cache = NULL;
}
-EXPORT_SYMBOL(ceph_osdc_cleanup);
/*
* handle incoming message
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 0da27c66349a..9645ffd6acfb 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -4,7 +4,6 @@
#include <linux/module.h>
#include <linux/slab.h>
-#include <asm/div64.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/osdmap.h>
@@ -2141,76 +2140,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
}
/*
- * calculate file layout from given offset, length.
- * fill in correct oid, logical length, and object extent
- * offset, length.
- *
- * for now, we write only a single su, until we can
- * pass a stride back to the caller.
- */
-int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
- u64 off, u64 len,
- u64 *ono,
- u64 *oxoff, u64 *oxlen)
-{
- u32 osize = layout->object_size;
- u32 su = layout->stripe_unit;
- u32 sc = layout->stripe_count;
- u32 bl, stripeno, stripepos, objsetno;
- u32 su_per_object;
- u64 t, su_offset;
-
- dout("mapping %llu~%llu osize %u fl_su %u\n", off, len,
- osize, su);
- if (su == 0 || sc == 0)
- goto invalid;
- su_per_object = osize / su;
- if (su_per_object == 0)
- goto invalid;
- dout("osize %u / su %u = su_per_object %u\n", osize, su,
- su_per_object);
-
- if ((su & ~PAGE_MASK) != 0)
- goto invalid;
-
- /* bl = *off / su; */
- t = off;
- do_div(t, su);
- bl = t;
- dout("off %llu / su %u = bl %u\n", off, su, bl);
-
- stripeno = bl / sc;
- stripepos = bl % sc;
- objsetno = stripeno / su_per_object;
-
- *ono = objsetno * sc + stripepos;
- dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono);
-
- /* *oxoff = *off % layout->fl_stripe_unit; # offset in su */
- t = off;
- su_offset = do_div(t, su);
- *oxoff = su_offset + (stripeno % su_per_object) * su;
-
- /*
- * Calculate the length of the extent being written to the selected
- * object. This is the minimum of the full length requested (len) or
- * the remainder of the current stripe being written to.
- */
- *oxlen = min_t(u64, len, su - su_offset);
-
- dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
- return 0;
-
-invalid:
- dout(" invalid layout\n");
- *ono = 0;
- *oxoff = 0;
- *oxlen = 0;
- return -EINVAL;
-}
-EXPORT_SYMBOL(ceph_calc_file_object_mapping);
-
-/*
* Map an object into a PG.
*
* Should only be called with target_oid and target_oloc (as opposed to
diff --git a/net/ceph/striper.c b/net/ceph/striper.c
new file mode 100644
index 000000000000..c36462dc86b7
--- /dev/null
+++ b/net/ceph/striper.c
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/math64.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/striper.h>
+#include <linux/ceph/types.h>
+
+/*
+ * Map a file extent to a stripe unit within an object.
+ * Fill in objno, offset into object, and object extent length (i.e. the
+ * number of bytes mapped, less than or equal to @l->stripe_unit).
+ *
+ * Example for stripe_count = 3, stripes_per_object = 4:
+ *
+ * blockno | 0 3 6 9 | 1 4 7 10 | 2 5 8 11 | 12 15 18 21 | 13 16 19
+ * stripeno | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 4 5 6 7 | 4 5 6
+ * stripepos | 0 | 1 | 2 | 0 | 1
+ * objno | 0 | 1 | 2 | 3 | 4
+ * objsetno | 0 | 1
+ */
+void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
+ u64 off, u64 len,
+ u64 *objno, u64 *objoff, u32 *xlen)
+{
+ u32 stripes_per_object = l->object_size / l->stripe_unit;
+ u64 blockno; /* which su in the file (i.e. globally) */
+ u32 blockoff; /* offset into su */
+ u64 stripeno; /* which stripe */
+ u32 stripepos; /* which su in the stripe,
+ which object in the object set */
+ u64 objsetno; /* which object set */
+ u32 objsetpos; /* which stripe in the object set */
+
+ blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
+ stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
+ objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
+
+ *objno = objsetno * l->stripe_count + stripepos;
+ *objoff = objsetpos * l->stripe_unit + blockoff;
+ *xlen = min_t(u64, len, l->stripe_unit - blockoff);
+}
+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
+
+/*
+ * Return the last extent with given objno (@object_extents is sorted
+ * by objno). If not found, return NULL and set @add_pos so that the
+ * new extent can be added with list_add(add_pos, new_ex).
+ */
+static struct ceph_object_extent *
+lookup_last(struct list_head *object_extents, u64 objno,
+ struct list_head **add_pos)
+{
+ struct list_head *pos;
+
+ list_for_each_prev(pos, object_extents) {
+ struct ceph_object_extent *ex =
+ list_entry(pos, typeof(*ex), oe_item);
+
+ if (ex->oe_objno == objno)
+ return ex;
+
+ if (ex->oe_objno < objno)
+ break;
+ }
+
+ *add_pos = pos;
+ return NULL;
+}
+
+static struct ceph_object_extent *
+lookup_containing(struct list_head *object_extents, u64 objno,
+ u64 objoff, u32 xlen)
+{
+ struct ceph_object_extent *ex;
+
+ list_for_each_entry(ex, object_extents, oe_item) {
+ if (ex->oe_objno == objno &&
+ ex->oe_off <= objoff &&
+ ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
+ return ex;
+
+ if (ex->oe_objno > objno)
+ break;
+ }
+
+ return NULL;
+}
+
+/*
+ * Map a file extent to a sorted list of object extents.
+ *
+ * We want only one (or as few as possible) object extents per object.
+ * Adjacent object extents will be merged together, each returned object
+ * extent may reverse map to multiple different file extents.
+ *
+ * Call @alloc_fn for each new object extent and @action_fn for each
+ * mapped stripe unit, whether it was merged into an already allocated
+ * object extent or started a new object extent.
+ *
+ * Newly allocated object extents are added to @object_extents.
+ * To keep @object_extents sorted, successive calls to this function
+ * must map successive file extents (i.e. the list of file extents that
+ * are mapped using the same @object_extents must be sorted).
+ *
+ * The caller is responsible for @object_extents.
+ */
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+ struct list_head *object_extents,
+ struct ceph_object_extent *alloc_fn(void *arg),
+ void *alloc_arg,
+ ceph_object_extent_fn_t action_fn,
+ void *action_arg)
+{
+ struct ceph_object_extent *last_ex, *ex;
+
+ while (len) {
+ struct list_head *add_pos = NULL;
+ u64 objno, objoff;
+ u32 xlen;
+
+ ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+ &xlen);
+
+ last_ex = lookup_last(object_extents, objno, &add_pos);
+ if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
+ ex = alloc_fn(alloc_arg);
+ if (!ex)
+ return -ENOMEM;
+
+ ex->oe_objno = objno;
+ ex->oe_off = objoff;
+ ex->oe_len = xlen;
+ if (action_fn)
+ action_fn(ex, xlen, action_arg);
+
+ if (!last_ex)
+ list_add(&ex->oe_item, add_pos);
+ else
+ list_add(&ex->oe_item, &last_ex->oe_item);
+ } else {
+ last_ex->oe_len += xlen;
+ if (action_fn)
+ action_fn(last_ex, xlen, action_arg);
+ }
+
+ off += xlen;
+ len -= xlen;
+ }
+
+ for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
+ ex = list_next_entry(last_ex, oe_item);
+ &ex->oe_item != object_extents;
+ last_ex = ex, ex = list_next_entry(ex, oe_item)) {
+ if (last_ex->oe_objno > ex->oe_objno ||
+ (last_ex->oe_objno == ex->oe_objno &&
+ last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
+ WARN(1, "%s: object_extents list not sorted!\n",
+ __func__);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ceph_file_to_extents);
+
+/*
+ * A stripped down, non-allocating version of ceph_file_to_extents(),
+ * for when @object_extents is already populated.
+ */
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+ struct list_head *object_extents,
+ ceph_object_extent_fn_t action_fn,
+ void *action_arg)
+{
+ while (len) {
+ struct ceph_object_extent *ex;
+ u64 objno, objoff;
+ u32 xlen;
+
+ ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+ &xlen);
+
+ ex = lookup_containing(object_extents, objno, objoff, xlen);
+ if (!ex) {
+ WARN(1, "%s: objno %llu %llu~%u not found!\n",
+ __func__, objno, objoff, xlen);
+ return -EINVAL;
+ }
+
+ action_fn(ex, xlen, action_arg);
+
+ off += xlen;
+ len -= xlen;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ceph_iterate_extents);
+
+/*
+ * Reverse map an object extent to a sorted list of file extents.
+ *
+ * On success, the caller is responsible for:
+ *
+ * kfree(file_extents)
+ */
+int ceph_extent_to_file(struct ceph_file_layout *l,
+ u64 objno, u64 objoff, u64 objlen,
+ struct ceph_file_extent **file_extents,
+ u32 *num_file_extents)
+{
+ u32 stripes_per_object = l->object_size / l->stripe_unit;
+ u64 blockno; /* which su */
+ u32 blockoff; /* offset into su */
+ u64 stripeno; /* which stripe */
+ u32 stripepos; /* which su in the stripe,
+ which object in the object set */
+ u64 objsetno; /* which object set */
+ u32 i = 0;
+
+ if (!objlen) {
+ *file_extents = NULL;
+ *num_file_extents = 0;
+ return 0;
+ }
+
+ *num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
+ DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
+ *file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
+ GFP_NOIO);
+ if (!*file_extents)
+ return -ENOMEM;
+
+ div_u64_rem(objoff, l->stripe_unit, &blockoff);
+ while (objlen) {
+ u64 off, len;
+
+ objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
+ stripeno = div_u64(objoff, l->stripe_unit) +
+ objsetno * stripes_per_object;
+ blockno = stripeno * l->stripe_count + stripepos;
+ off = blockno * l->stripe_unit + blockoff;
+ len = min_t(u64, objlen, l->stripe_unit - blockoff);
+
+ (*file_extents)[i].fe_off = off;
+ (*file_extents)[i].fe_len = len;
+
+ blockoff = 0;
+ objoff += len;
+ objlen -= len;
+ i++;
+ }
+
+ BUG_ON(i != *num_file_extents);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_extent_to_file);
diff --git a/net/core/dev.c b/net/core/dev.c
index 969462ebb296..af0558b00c6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2969,7 +2969,7 @@ netdev_features_t passthru_features_check(struct sk_buff *skb,
}
EXPORT_SYMBOL(passthru_features_check);
-static netdev_features_t dflt_features_check(const struct sk_buff *skb,
+static netdev_features_t dflt_features_check(struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features)
{
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index e3e6a3e2ca22..d884d8f5f0e5 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -839,7 +839,7 @@ void dev_mc_flush(struct net_device *dev)
EXPORT_SYMBOL(dev_mc_flush);
/**
- * dev_mc_flush - Init multicast address list
+ * dev_mc_init - Init multicast address list
* @dev: device
*
* Init multicast address list.
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 7b7a14abba28..ce519861be59 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -55,7 +55,8 @@ static void neigh_timer_handler(struct timer_list *t);
static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
-static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
+static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
+ struct net_device *dev);
#ifdef CONFIG_PROC_FS
static const struct file_operations neigh_stat_seq_fops;
@@ -291,8 +292,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
write_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev);
- pneigh_ifdown(tbl, dev);
- write_unlock_bh(&tbl->lock);
+ pneigh_ifdown_and_unlock(tbl, dev);
del_timer_sync(&tbl->proxy_timer);
pneigh_queue_purge(&tbl->proxy_queue);
@@ -681,9 +681,10 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
return -ENOENT;
}
-static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
+ struct net_device *dev)
{
- struct pneigh_entry *n, **np;
+ struct pneigh_entry *n, **np, *freelist = NULL;
u32 h;
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
@@ -691,16 +692,23 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
while ((n = *np) != NULL) {
if (!dev || n->dev == dev) {
*np = n->next;
- if (tbl->pdestructor)
- tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
- kfree(n);
+ n->next = freelist;
+ freelist = n;
continue;
}
np = &n->next;
}
}
+ write_unlock_bh(&tbl->lock);
+ while ((n = freelist)) {
+ freelist = n->next;
+ n->next = NULL;
+ if (tbl->pdestructor)
+ tbl->pdestructor(n);
+ if (n->dev)
+ dev_put(n->dev);
+ kfree(n);
+ }
return -ENOENT;
}
@@ -2323,12 +2331,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
if (!err) {
- if (tb[NDA_IFINDEX])
+ if (tb[NDA_IFINDEX]) {
+ if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
+ return -EINVAL;
filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
-
- if (tb[NDA_MASTER])
+ }
+ if (tb[NDA_MASTER]) {
+ if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
+ return -EINVAL;
filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
-
+ }
if (filter_idx || filter_master_idx)
flags |= NLM_F_DUMP_FILTERED;
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b3b609f0eeb5..b1a2c5e38530 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -15,7 +15,6 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/slab.h>
-#include <linux/kmemleak.h>
#include <net/ip.h>
#include <net/sock.h>
diff --git a/net/core/utils.c b/net/core/utils.c
index 93066bd0305a..d47863b07a60 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -403,6 +403,29 @@ int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af,
}
EXPORT_SYMBOL(inet_pton_with_scope);
+bool inet_addr_is_any(struct sockaddr *addr)
+{
+ if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr;
+ const struct sockaddr_in6 in6_any =
+ { .sin6_addr = IN6ADDR_ANY_INIT };
+
+ if (!memcmp(in6->sin6_addr.s6_addr,
+ in6_any.sin6_addr.s6_addr, 16))
+ return true;
+ } else if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *in = (struct sockaddr_in *)addr;
+
+ if (in->sin_addr.s_addr == htonl(INADDR_ANY))
+ return true;
+ } else {
+ pr_warn("unexpected address family %u\n", addr->sa_family);
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(inet_addr_is_any);
+
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
__be32 from, __be32 to, bool pseudohdr)
{
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 8396705deffc..40c851693f77 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -91,9 +91,9 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
next_opt = memchr(opt, '#', end - opt) ?: end;
opt_len = next_opt - opt;
- if (!opt_len) {
- printk(KERN_WARNING
- "Empty option to dns_resolver key\n");
+ if (opt_len <= 0 || opt_len > 128) {
+ pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n",
+ opt_len);
return -EINVAL;
}
@@ -127,10 +127,8 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
}
bad_option_value:
- printk(KERN_WARNING
- "Option '%*.*s' to dns_resolver key:"
- " bad/missing value\n",
- opt_nlen, opt_nlen, opt);
+ pr_warn_ratelimited("Option '%*.*s' to dns_resolver key: bad/missing value\n",
+ opt_nlen, opt_nlen, opt);
return -EINVAL;
} while (opt = next_opt + 1, opt < end);
}
diff --git a/net/ife/ife.c b/net/ife/ife.c
index 7d1ec76e7f43..13bbf8cb6a39 100644
--- a/net/ife/ife.c
+++ b/net/ife/ife.c
@@ -69,6 +69,9 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen)
int total_pull;
u16 ifehdrln;
+ if (!pskb_may_pull(skb, skb->dev->hard_header_len + IFE_METAHDRLEN))
+ return NULL;
+
ifehdr = (struct ifeheadr *) (skb->data + skb->dev->hard_header_len);
ifehdrln = ntohs(ifehdr->metalen);
total_pull = skb->dev->hard_header_len + ifehdrln;
@@ -92,12 +95,43 @@ struct meta_tlvhdr {
__be16 len;
};
+static bool __ife_tlv_meta_valid(const unsigned char *skbdata,
+ const unsigned char *ifehdr_end)
+{
+ const struct meta_tlvhdr *tlv;
+ u16 tlvlen;
+
+ if (unlikely(skbdata + sizeof(*tlv) > ifehdr_end))
+ return false;
+
+ tlv = (const struct meta_tlvhdr *)skbdata;
+ tlvlen = ntohs(tlv->len);
+
+ /* tlv length field is inc header, check on minimum */
+ if (tlvlen < NLA_HDRLEN)
+ return false;
+
+ /* overflow by NLA_ALIGN check */
+ if (NLA_ALIGN(tlvlen) < tlvlen)
+ return false;
+
+ if (unlikely(skbdata + NLA_ALIGN(tlvlen) > ifehdr_end))
+ return false;
+
+ return true;
+}
+
/* Caller takes care of presenting data in network order
*/
-void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen)
+void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype,
+ u16 *dlen, u16 *totlen)
{
- struct meta_tlvhdr *tlv = (struct meta_tlvhdr *) skbdata;
+ struct meta_tlvhdr *tlv;
+
+ if (!__ife_tlv_meta_valid(skbdata, ifehdr_end))
+ return NULL;
+ tlv = (struct meta_tlvhdr *)skbdata;
*dlen = ntohs(tlv->len) - NLA_HDRLEN;
*attrtype = ntohs(tlv->type);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a8772a978224..9c169bb2444d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -781,8 +781,14 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
tunnel->encap.type == TUNNEL_ENCAP_NONE) {
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ } else {
+ dev->features &= ~NETIF_F_GSO_SOFTWARE;
+ dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
}
dev->features |= NETIF_F_LLTX;
+ } else {
+ dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
+ dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
}
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4c11b810a447..83c73bab2c3d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1109,6 +1109,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
struct ip_options_rcu *opt;
struct rtable *rt;
+ rt = *rtp;
+ if (unlikely(!rt))
+ return -EFAULT;
+
/*
* setup for corking.
*/
@@ -1124,9 +1128,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->flags |= IPCORK_OPT;
cork->addr = ipc->addr;
}
- rt = *rtp;
- if (unlikely(!rt))
- return -EFAULT;
+
/*
* We steal reference to this route, caller should not release it
*/
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 7523ddb2566b..0e5edd0c7926 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -28,10 +28,9 @@ obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o
obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
-nf_nat_snmp_basic-y := nf_nat_snmp_basic-asn1.o nf_nat_snmp_basic_main.o
-$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic-asn1.h
+nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o
+$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-clean-files := nf_nat_snmp_basic-asn1.c nf_nat_snmp_basic-asn1.h
obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
index b6e277093e7e..ac110c1d55b5 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -54,7 +54,7 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <linux/netfilter/nf_conntrack_snmp.h>
-#include "nf_nat_snmp_basic-asn1.h"
+#include "nf_nat_snmp_basic.asn1.h"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 59bc6ab1a4eb..ccb25d80f679 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -108,7 +108,6 @@
#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
-#include <linux/kmemleak.h>
#endif
#include <net/secure_seq.h>
#include <net/ip_tunnels.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bccc4c270087..9ce1c726185e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2368,6 +2368,7 @@ void tcp_write_queue_purge(struct sock *sk)
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
sk_mem_reclaim(sk);
tcp_clear_all_retrans_hints(tcp_sk(sk));
+ tcp_sk(sk)->packets_out = 0;
}
int tcp_disconnect(struct sock *sk, int flags)
@@ -2417,7 +2418,6 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_backoff = 0;
tp->snd_cwnd = 2;
icsk->icsk_probes_out = 0;
- tp->packets_out = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
@@ -2813,8 +2813,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
#ifdef CONFIG_TCP_MD5SIG
case TCP_MD5SIG:
case TCP_MD5SIG_EXT:
- /* Read the IP->Key mappings from userspace */
- err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
+ if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
+ err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
+ else
+ err = -EINVAL;
break;
#endif
case TCP_USER_TIMEOUT:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 367def6ddeda..e51c644484dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3868,11 +3868,8 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
int length = (th->doff << 2) - sizeof(*th);
const u8 *ptr = (const u8 *)(th + 1);
- /* If the TCP option is too short, we can short cut */
- if (length < TCPOLEN_MD5SIG)
- return NULL;
-
- while (length > 0) {
+ /* If not enough data remaining, we can short cut */
+ while (length >= TCPOLEN_MD5SIG) {
int opcode = *ptr++;
int opsize;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 49b954d6d0fa..cde7d8251377 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3975,6 +3975,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
+ [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
[RTA_OIF] = { .type = NLA_U32 },
[RTA_IIF] = { .type = NLA_U32 },
[RTA_PRIORITY] = { .type = NLA_U32 },
@@ -3986,6 +3987,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_EXPIRES] = { .type = NLA_U32 },
[RTA_UID] = { .type = NLA_U32 },
[RTA_MARK] = { .type = NLA_U32 },
+ [RTA_TABLE] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index f343e6f0fc95..5fe139484919 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -136,7 +136,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
- set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr);
+ set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(isrh)) {
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 14b67dfacc4b..40261cb68e83 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -183,6 +183,26 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
+struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
+{
+ const struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_tunnel *tunnel;
+ int count = 0;
+
+ rcu_read_lock_bh();
+ list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+ if (++count > nth) {
+ l2tp_tunnel_inc_refcount(tunnel);
+ rcu_read_unlock_bh();
+ return tunnel;
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth);
+
/* Lookup a session. A new reference is held on the returned session. */
struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
@@ -335,46 +355,6 @@ err_tlock:
}
EXPORT_SYMBOL_GPL(l2tp_session_register);
-/* Lookup a tunnel by id
- */
-struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id)
-{
- struct l2tp_tunnel *tunnel;
- struct l2tp_net *pn = l2tp_pernet(net);
-
- rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (tunnel->tunnel_id == tunnel_id) {
- rcu_read_unlock_bh();
- return tunnel;
- }
- }
- rcu_read_unlock_bh();
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
-
-struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth)
-{
- struct l2tp_net *pn = l2tp_pernet(net);
- struct l2tp_tunnel *tunnel;
- int count = 0;
-
- rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (++count > nth) {
- rcu_read_unlock_bh();
- return tunnel;
- }
- }
-
- rcu_read_unlock_bh();
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth);
-
/*****************************************************************************
* Receive data handling
*****************************************************************************/
@@ -1436,74 +1416,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
{
struct l2tp_tunnel *tunnel = NULL;
int err;
- struct socket *sock = NULL;
- struct sock *sk = NULL;
- struct l2tp_net *pn;
enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
- /* Get the tunnel socket from the fd, which was opened by
- * the userspace L2TP daemon. If not specified, create a
- * kernel socket.
- */
- if (fd < 0) {
- err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id,
- cfg, &sock);
- if (err < 0)
- goto err;
- } else {
- sock = sockfd_lookup(fd, &err);
- if (!sock) {
- pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n",
- tunnel_id, fd, err);
- err = -EBADF;
- goto err;
- }
-
- /* Reject namespace mismatches */
- if (!net_eq(sock_net(sock->sk), net)) {
- pr_err("tunl %u: netns mismatch\n", tunnel_id);
- err = -EINVAL;
- goto err;
- }
- }
-
- sk = sock->sk;
-
if (cfg != NULL)
encap = cfg->encap;
- /* Quick sanity checks */
- err = -EPROTONOSUPPORT;
- if (sk->sk_type != SOCK_DGRAM) {
- pr_debug("tunl %hu: fd %d wrong socket type\n",
- tunnel_id, fd);
- goto err;
- }
- switch (encap) {
- case L2TP_ENCAPTYPE_UDP:
- if (sk->sk_protocol != IPPROTO_UDP) {
- pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
- tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
- goto err;
- }
- break;
- case L2TP_ENCAPTYPE_IP:
- if (sk->sk_protocol != IPPROTO_L2TP) {
- pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
- tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
- goto err;
- }
- break;
- }
-
- /* Check if this socket has already been prepped */
- tunnel = l2tp_tunnel(sk);
- if (tunnel != NULL) {
- /* This socket has already been prepped */
- err = -EBUSY;
- goto err;
- }
-
tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
if (tunnel == NULL) {
err = -ENOMEM;
@@ -1520,72 +1437,126 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
rwlock_init(&tunnel->hlist_lock);
tunnel->acpt_newsess = true;
- /* The net we belong to */
- tunnel->l2tp_net = net;
- pn = l2tp_pernet(net);
-
if (cfg != NULL)
tunnel->debug = cfg->debug;
- /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
tunnel->encap = encap;
- if (encap == L2TP_ENCAPTYPE_UDP) {
- struct udp_tunnel_sock_cfg udp_cfg = { };
-
- udp_cfg.sk_user_data = tunnel;
- udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
- udp_cfg.encap_rcv = l2tp_udp_encap_recv;
- udp_cfg.encap_destroy = l2tp_udp_encap_destroy;
-
- setup_udp_tunnel_sock(net, sock, &udp_cfg);
- } else {
- sk->sk_user_data = tunnel;
- }
- /* Bump the reference count. The tunnel context is deleted
- * only when this drops to zero. A reference is also held on
- * the tunnel socket to ensure that it is not released while
- * the tunnel is extant. Must be done before sk_destruct is
- * set.
- */
refcount_set(&tunnel->ref_count, 1);
- sock_hold(sk);
- tunnel->sock = sk;
tunnel->fd = fd;
- /* Hook on the tunnel socket destructor so that we can cleanup
- * if the tunnel socket goes away.
- */
- tunnel->old_sk_destruct = sk->sk_destruct;
- sk->sk_destruct = &l2tp_tunnel_destruct;
- lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock");
-
- sk->sk_allocation = GFP_ATOMIC;
-
/* Init delete workqueue struct */
INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
- /* Add tunnel to our list */
INIT_LIST_HEAD(&tunnel->list);
- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
err = 0;
err:
if (tunnelp)
*tunnelp = tunnel;
- /* If tunnel's socket was created by the kernel, it doesn't
- * have a file.
- */
- if (sock && sock->file)
- sockfd_put(sock);
-
return err;
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
+ enum l2tp_encap_type encap)
+{
+ if (!net_eq(sock_net(sk), net))
+ return -EINVAL;
+
+ if (sk->sk_type != SOCK_DGRAM)
+ return -EPROTONOSUPPORT;
+
+ if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) ||
+ (encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP))
+ return -EPROTONOSUPPORT;
+
+ if (sk->sk_user_data)
+ return -EBUSY;
+
+ return 0;
+}
+
+int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+ struct l2tp_tunnel_cfg *cfg)
+{
+ struct l2tp_tunnel *tunnel_walk;
+ struct l2tp_net *pn;
+ struct socket *sock;
+ struct sock *sk;
+ int ret;
+
+ if (tunnel->fd < 0) {
+ ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id,
+ tunnel->peer_tunnel_id, cfg,
+ &sock);
+ if (ret < 0)
+ goto err;
+ } else {
+ sock = sockfd_lookup(tunnel->fd, &ret);
+ if (!sock)
+ goto err;
+
+ ret = l2tp_validate_socket(sock->sk, net, tunnel->encap);
+ if (ret < 0)
+ goto err_sock;
+ }
+
+ sk = sock->sk;
+
+ sock_hold(sk);
+ tunnel->sock = sk;
+ tunnel->l2tp_net = net;
+
+ pn = l2tp_pernet(net);
+
+ spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+ list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
+ if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
+ spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+ ret = -EEXIST;
+ goto err_sock;
+ }
+ }
+ list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
+ spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ struct udp_tunnel_sock_cfg udp_cfg = {
+ .sk_user_data = tunnel,
+ .encap_type = UDP_ENCAP_L2TPINUDP,
+ .encap_rcv = l2tp_udp_encap_recv,
+ .encap_destroy = l2tp_udp_encap_destroy,
+ };
+
+ setup_udp_tunnel_sock(net, sock, &udp_cfg);
+ } else {
+ sk->sk_user_data = tunnel;
+ }
+
+ tunnel->old_sk_destruct = sk->sk_destruct;
+ sk->sk_destruct = &l2tp_tunnel_destruct;
+ lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class,
+ "l2tp_sock");
+ sk->sk_allocation = GFP_ATOMIC;
+
+ if (tunnel->fd >= 0)
+ sockfd_put(sock);
+
+ return 0;
+
+err_sock:
+ if (tunnel->fd < 0)
+ sock_release(sock);
+ else
+ sockfd_put(sock);
+err:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
+
/* This function is used by the netlink TUNNEL_DELETE command.
*/
void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2718d0b284d0..c199020f8a8a 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -212,6 +212,8 @@ static inline void *l2tp_session_priv(struct l2tp_session *session)
}
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
+struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth);
+
void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
struct l2tp_session *l2tp_session_get(const struct net *net,
@@ -220,12 +222,13 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
const char *ifname);
-struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
-struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
struct l2tp_tunnel **tunnelp);
+int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+ struct l2tp_tunnel_cfg *cfg);
+
void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
struct l2tp_session *l2tp_session_create(int priv_size,
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 72e713da4733..7f1e842ef05a 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -47,7 +47,11 @@ struct l2tp_dfs_seq_data {
static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
{
- pd->tunnel = l2tp_tunnel_find_nth(pd->net, pd->tunnel_idx);
+ /* Drop reference taken during previous invocation */
+ if (pd->tunnel)
+ l2tp_tunnel_dec_refcount(pd->tunnel);
+
+ pd->tunnel = l2tp_tunnel_get_nth(pd->net, pd->tunnel_idx);
pd->tunnel_idx++;
}
@@ -96,7 +100,17 @@ static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos)
static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
{
- /* nothing to do */
+ struct l2tp_dfs_seq_data *pd = v;
+
+ if (!pd || pd == SEQ_START_TOKEN)
+ return;
+
+ /* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */
+ if (pd->tunnel) {
+ l2tp_tunnel_dec_refcount(pd->tunnel);
+ pd->tunnel = NULL;
+ pd->session = NULL;
+ }
}
static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index e7ea9c4b89ff..6616c9fd292f 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -236,12 +236,6 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
if (info->attrs[L2TP_ATTR_DEBUG])
cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel != NULL) {
- ret = -EEXIST;
- goto out;
- }
-
ret = -EINVAL;
switch (cfg.encap) {
case L2TP_ENCAPTYPE_UDP:
@@ -251,9 +245,19 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
break;
}
- if (ret >= 0)
- ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
- tunnel, L2TP_CMD_TUNNEL_CREATE);
+ if (ret < 0)
+ goto out;
+
+ l2tp_tunnel_inc_refcount(tunnel);
+ ret = l2tp_tunnel_register(tunnel, net, &cfg);
+ if (ret < 0) {
+ kfree(tunnel);
+ goto out;
+ }
+ ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel,
+ L2TP_CMD_TUNNEL_CREATE);
+ l2tp_tunnel_dec_refcount(tunnel);
+
out:
return ret;
}
@@ -483,14 +487,17 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
struct net *net = sock_net(skb->sk);
for (;;) {
- tunnel = l2tp_tunnel_find_nth(net, ti);
+ tunnel = l2tp_tunnel_get_nth(net, ti);
if (tunnel == NULL)
goto out;
if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- tunnel, L2TP_CMD_TUNNEL_GET) < 0)
+ tunnel, L2TP_CMD_TUNNEL_GET) < 0) {
+ l2tp_tunnel_dec_refcount(tunnel);
goto out;
+ }
+ l2tp_tunnel_dec_refcount(tunnel);
ti++;
}
@@ -844,7 +851,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
for (;;) {
if (tunnel == NULL) {
- tunnel = l2tp_tunnel_find_nth(net, ti);
+ tunnel = l2tp_tunnel_get_nth(net, ti);
if (tunnel == NULL)
goto out;
}
@@ -852,6 +859,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
session = l2tp_session_get_nth(tunnel, si);
if (session == NULL) {
ti++;
+ l2tp_tunnel_dec_refcount(tunnel);
tunnel = NULL;
si = 0;
continue;
@@ -861,6 +869,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
cb->nlh->nlmsg_seq, NLM_F_MULTI,
session, L2TP_CMD_SESSION_GET) < 0) {
l2tp_session_dec_refcount(session);
+ l2tp_tunnel_dec_refcount(tunnel);
break;
}
l2tp_session_dec_refcount(session);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d6deca11da19..1404bc1c1bb7 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -698,6 +698,15 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
if (error < 0)
goto end;
+
+ l2tp_tunnel_inc_refcount(tunnel);
+ error = l2tp_tunnel_register(tunnel, sock_net(sk),
+ &tcfg);
+ if (error < 0) {
+ kfree(tunnel);
+ goto end;
+ }
+ drop_tunnel = true;
}
} else {
/* Error if we can't find the tunnel */
@@ -1542,16 +1551,19 @@ struct pppol2tp_seq_data {
static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
{
+ /* Drop reference taken during previous invocation */
+ if (pd->tunnel)
+ l2tp_tunnel_dec_refcount(pd->tunnel);
+
for (;;) {
- pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
+ pd->tunnel = l2tp_tunnel_get_nth(net, pd->tunnel_idx);
pd->tunnel_idx++;
- if (pd->tunnel == NULL)
- break;
+ /* Only accept L2TPv2 tunnels */
+ if (!pd->tunnel || pd->tunnel->version == 2)
+ return;
- /* Ignore L2TPv3 tunnels */
- if (pd->tunnel->version < 3)
- break;
+ l2tp_tunnel_dec_refcount(pd->tunnel);
}
}
@@ -1600,7 +1612,17 @@ static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
static void pppol2tp_seq_stop(struct seq_file *p, void *v)
{
- /* nothing to do */
+ struct pppol2tp_seq_data *pd = v;
+
+ if (!pd || pd == SEQ_START_TOKEN)
+ return;
+
+ /* Drop reference taken by last invocation of pppol2tp_next_tunnel() */
+ if (pd->tunnel) {
+ l2tp_tunnel_dec_refcount(pd->tunnel);
+ pd->tunnel = NULL;
+ pd->session = NULL;
+ }
}
static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 01dcc0823d1f..cb80ebb38311 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -199,9 +199,19 @@ static int llc_ui_release(struct socket *sock)
llc->laddr.lsap, llc->daddr.lsap);
if (!llc_send_disc(sk))
llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
- if (!sock_flag(sk, SOCK_ZAPPED))
+ if (!sock_flag(sk, SOCK_ZAPPED)) {
+ struct llc_sap *sap = llc->sap;
+
+ /* Hold this for release_sock(), so that llc_backlog_rcv()
+ * could still use it.
+ */
+ llc_sap_hold(sap);
llc_sap_remove_socket(llc->sap, sk);
- release_sock(sk);
+ release_sock(sk);
+ llc_sap_put(sap);
+ } else {
+ release_sock(sk);
+ }
if (llc->dev)
dev_put(llc->dev);
sock_put(sk);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 163121192aca..4d78375f9872 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1099,14 +1099,7 @@ int llc_conn_ac_inc_tx_win_size(struct sock *sk, struct sk_buff *skb)
int llc_conn_ac_stop_all_timers(struct sock *sk, struct sk_buff *skb)
{
- struct llc_sock *llc = llc_sk(sk);
-
- del_timer(&llc->pf_cycle_timer.timer);
- del_timer(&llc->ack_timer.timer);
- del_timer(&llc->rej_sent_timer.timer);
- del_timer(&llc->busy_state_timer.timer);
- llc->ack_must_be_send = 0;
- llc->ack_pf = 0;
+ llc_sk_stop_all_timers(sk, false);
return 0;
}
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 110e32bcb399..c0ac522b48a1 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -961,6 +961,26 @@ out:
return sk;
}
+void llc_sk_stop_all_timers(struct sock *sk, bool sync)
+{
+ struct llc_sock *llc = llc_sk(sk);
+
+ if (sync) {
+ del_timer_sync(&llc->pf_cycle_timer.timer);
+ del_timer_sync(&llc->ack_timer.timer);
+ del_timer_sync(&llc->rej_sent_timer.timer);
+ del_timer_sync(&llc->busy_state_timer.timer);
+ } else {
+ del_timer(&llc->pf_cycle_timer.timer);
+ del_timer(&llc->ack_timer.timer);
+ del_timer(&llc->rej_sent_timer.timer);
+ del_timer(&llc->busy_state_timer.timer);
+ }
+
+ llc->ack_must_be_send = 0;
+ llc->ack_pf = 0;
+}
+
/**
* llc_sk_free - Frees a LLC socket
* @sk - socket to free
@@ -973,7 +993,7 @@ void llc_sk_free(struct sock *sk)
llc->state = LLC_CONN_OUT_OF_SVC;
/* Stop all (possibly) running timers */
- llc_conn_ac_stop_all_timers(sk, NULL);
+ llc_sk_stop_all_timers(sk, true);
#ifdef DEBUG_LLC_CONN_ALLOC
printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __func__,
skb_queue_len(&llc->pdu_unack_q),
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 22dc1b9d6362..c070dfc0190a 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1472,6 +1472,16 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
iface = rcu_dereference(netlbl_unlhsh_def);
if (iface == NULL || !iface->valid)
goto unlabel_getattr_nolabel;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ /* When resolving a fallback label, check the sk_buff version as
+ * it is possible (e.g. SCTP) to have family = PF_INET6 while
+ * receiving ip_hdr(skb)->version = 4.
+ */
+ if (family == PF_INET6 && ip_hdr(skb)->version == 4)
+ family = PF_INET;
+#endif /* IPv6 */
+
switch (family) {
case PF_INET: {
struct iphdr *hdr4;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 616cb9c18f88..c31b0687396a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3008,6 +3008,7 @@ static int packet_release(struct socket *sock)
packet_flush_mclist(sk);
+ lock_sock(sk);
if (po->rx_ring.pg_vec) {
memset(&req_u, 0, sizeof(req_u));
packet_set_ring(sk, &req_u, 1, 0);
@@ -3017,6 +3018,7 @@ static int packet_release(struct socket *sock)
memset(&req_u, 0, sizeof(req_u));
packet_set_ring(sk, &req_u, 1, 1);
}
+ release_sock(sk);
f = fanout_release(sk);
@@ -3643,6 +3645,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
union tpacket_req_u req_u;
int len;
+ lock_sock(sk);
switch (po->tp_version) {
case TPACKET_V1:
case TPACKET_V2:
@@ -3653,12 +3656,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
len = sizeof(req_u.req3);
break;
}
- if (optlen < len)
- return -EINVAL;
- if (copy_from_user(&req_u.req, optval, len))
- return -EFAULT;
- return packet_set_ring(sk, &req_u, 0,
- optname == PACKET_TX_RING);
+ if (optlen < len) {
+ ret = -EINVAL;
+ } else {
+ if (copy_from_user(&req_u.req, optval, len))
+ ret = -EFAULT;
+ else
+ ret = packet_set_ring(sk, &req_u, 0,
+ optname == PACKET_TX_RING);
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_COPY_THRESH:
{
@@ -4208,8 +4216,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
/* Added to avoid minimal code churn */
struct tpacket_req *req = &req_u->req;
- lock_sock(sk);
-
rb = tx_ring ? &po->tx_ring : &po->rx_ring;
rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
@@ -4347,7 +4353,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
- release_sock(sk);
return err;
}
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index b33e5aeb4c06..2aa07b547b16 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1135,3 +1135,4 @@ module_exit(qrtr_proto_fini);
MODULE_DESCRIPTION("Qualcomm IPC-router driver");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_NETPROTO(PF_QIPCRTR);
diff --git a/net/rds/send.c b/net/rds/send.c
index acad04243b41..94c7f74909be 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -1017,10 +1017,15 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
if (conn->c_npaths == 0 && hash != 0) {
rds_send_ping(conn, 0);
- if (conn->c_npaths == 0) {
- wait_event_interruptible(conn->c_hs_waitq,
- (conn->c_npaths != 0));
- }
+ /* The underlying connection is not up yet. Need to wait
+ * until it is up to be sure that the non-zero c_path can be
+ * used. But if we are interrupted, we have to use the zero
+ * c_path in case the connection ends up being non-MP capable.
+ */
+ if (conn->c_npaths == 0)
+ if (wait_event_interruptible(conn->c_hs_waitq,
+ conn->c_npaths != 0))
+ hash = 0;
if (conn->c_npaths == 1)
hash = 0;
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a5994cf0512b..8527cfdc446d 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -652,7 +652,7 @@ static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
}
}
- return 0;
+ return -ENOENT;
}
static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
@@ -682,7 +682,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
u16 mtype;
u16 dlen;
- curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL);
+ curr_data = ife_tlv_meta_decode(tlv_data, ifehdr_end, &mtype,
+ &dlen, NULL);
+ if (!curr_data) {
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
+ return TC_ACT_SHOT;
+ }
if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) {
/* abuse overlimits to count when we receive metadata
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index f889a84f264d..be296d633e95 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -172,6 +172,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
struct list_head *pos, *temp;
struct sctp_chunk *chunk;
struct sctp_datamsg *msg;
+ struct sctp_sock *sp;
+ struct sctp_af *af;
int err;
msg = sctp_datamsg_new(GFP_KERNEL);
@@ -190,9 +192,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
/* This is the biggest possible DATA chunk that can fit into
* the packet
*/
- max_data = asoc->pathmtu -
- sctp_sk(asoc->base.sk)->pf->af->net_header_len -
- sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream);
+ sp = sctp_sk(asoc->base.sk);
+ af = sp->pf->af;
+ max_data = asoc->pathmtu - af->net_header_len -
+ sizeof(struct sctphdr) - sctp_datachk_len(&asoc->stream) -
+ af->ip_options_len(asoc->base.sk);
max_data = SCTP_TRUNC4(max_data);
/* If the the peer requested that we authenticate DATA chunks
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f1fc48e9689c..2e3f7b75a8ec 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -427,6 +427,41 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
rcu_read_unlock();
}
+/* Copy over any ip options */
+static void sctp_v6_copy_ip_options(struct sock *sk, struct sock *newsk)
+{
+ struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+
+ newnp = inet6_sk(newsk);
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ if (!opt)
+ pr_err("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+}
+
+/* Account for the IP options */
+static int sctp_v6_ip_options_len(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+ int len = 0;
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt)
+ len = opt->opt_flen + opt->opt_nflen;
+
+ rcu_read_unlock();
+ return len;
+}
+
/* Initialize a sockaddr_storage from in incoming skb. */
static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb,
int is_saddr)
@@ -521,46 +556,49 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
addr->v6.sin6_scope_id = 0;
}
-/* Compare addresses exactly.
- * v4-mapped-v6 is also in consideration.
- */
-static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
- const union sctp_addr *addr2)
+static int __sctp_v6_cmp_addr(const union sctp_addr *addr1,
+ const union sctp_addr *addr2)
{
if (addr1->sa.sa_family != addr2->sa.sa_family) {
if (addr1->sa.sa_family == AF_INET &&
addr2->sa.sa_family == AF_INET6 &&
- ipv6_addr_v4mapped(&addr2->v6.sin6_addr)) {
- if (addr2->v6.sin6_port == addr1->v4.sin_port &&
- addr2->v6.sin6_addr.s6_addr32[3] ==
- addr1->v4.sin_addr.s_addr)
- return 1;
- }
+ ipv6_addr_v4mapped(&addr2->v6.sin6_addr) &&
+ addr2->v6.sin6_addr.s6_addr32[3] ==
+ addr1->v4.sin_addr.s_addr)
+ return 1;
+
if (addr2->sa.sa_family == AF_INET &&
addr1->sa.sa_family == AF_INET6 &&
- ipv6_addr_v4mapped(&addr1->v6.sin6_addr)) {
- if (addr1->v6.sin6_port == addr2->v4.sin_port &&
- addr1->v6.sin6_addr.s6_addr32[3] ==
- addr2->v4.sin_addr.s_addr)
- return 1;
- }
+ ipv6_addr_v4mapped(&addr1->v6.sin6_addr) &&
+ addr1->v6.sin6_addr.s6_addr32[3] ==
+ addr2->v4.sin_addr.s_addr)
+ return 1;
+
return 0;
}
- if (addr1->v6.sin6_port != addr2->v6.sin6_port)
- return 0;
+
if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
return 0;
+
/* If this is a linklocal address, compare the scope_id. */
- if (ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
- if (addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id &&
- (addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id)) {
- return 0;
- }
- }
+ if ((ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
+ addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id &&
+ addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id)
+ return 0;
return 1;
}
+/* Compare addresses exactly.
+ * v4-mapped-v6 is also in consideration.
+ */
+static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
+ const union sctp_addr *addr2)
+{
+ return __sctp_v6_cmp_addr(addr1, addr2) &&
+ addr1->v6.sin6_port == addr2->v6.sin6_port;
+}
+
/* Initialize addr struct to INADDR_ANY. */
static void sctp_v6_inaddr_any(union sctp_addr *addr, __be16 port)
{
@@ -666,7 +704,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
struct sock *newsk;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
- struct ipv6_txoptions *opt;
newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
if (!newsk)
@@ -689,12 +726,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
- rcu_read_lock();
- opt = rcu_dereference(np->opt);
- if (opt)
- opt = ipv6_dup_options(newsk, opt);
- RCU_INIT_POINTER(newnp->opt, opt);
- rcu_read_unlock();
+ sctp_v6_copy_ip_options(sk, newsk);
/* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
* and getpeername().
@@ -846,8 +878,8 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
const union sctp_addr *addr2,
struct sctp_sock *opt)
{
- struct sctp_af *af1, *af2;
struct sock *sk = sctp_opt2sk(opt);
+ struct sctp_af *af1, *af2;
af1 = sctp_get_af_specific(addr1->sa.sa_family);
af2 = sctp_get_af_specific(addr2->sa.sa_family);
@@ -863,10 +895,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2))
return 1;
- if (addr1->sa.sa_family != addr2->sa.sa_family)
- return 0;
-
- return af1->cmp_addr(addr1, addr2);
+ return __sctp_v6_cmp_addr(addr1, addr2);
}
/* Verify that the provided sockaddr looks bindable. Common verification,
@@ -1043,6 +1072,7 @@ static struct sctp_af sctp_af_inet6 = {
.ecn_capable = sctp_v6_ecn_capable,
.net_header_len = sizeof(struct ipv6hdr),
.sockaddr_len = sizeof(struct sockaddr_in6),
+ .ip_options_len = sctp_v6_ip_options_len,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
@@ -1061,6 +1091,7 @@ static struct sctp_pf sctp_pf_inet6 = {
.addr_to_user = sctp_v6_addr_to_user,
.to_sk_saddr = sctp_v6_to_sk_saddr,
.to_sk_daddr = sctp_v6_to_sk_daddr,
+ .copy_ip_options = sctp_v6_copy_ip_options,
.af = &sctp_af_inet6,
};
diff --git a/net/sctp/output.c b/net/sctp/output.c
index d6e1c90cc09a..690d8557bb7b 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -69,7 +69,11 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
static void sctp_packet_reset(struct sctp_packet *packet)
{
+ /* sctp_packet_transmit() relies on this to reset size to the
+ * current overhead after sending packets.
+ */
packet->size = packet->overhead;
+
packet->has_cookie_echo = 0;
packet->has_sack = 0;
packet->has_data = 0;
@@ -87,6 +91,7 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
struct sock *sk;
+ size_t overhead = sizeof(struct ipv6hdr) + sizeof(struct sctphdr);
pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
packet->vtag = vtag;
@@ -95,10 +100,22 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
if (!sctp_packet_empty(packet))
return;
- /* set packet max_size with pathmtu */
+ /* set packet max_size with pathmtu, then calculate overhead */
packet->max_size = tp->pathmtu;
- if (!asoc)
+ if (asoc) {
+ struct sctp_sock *sp = sctp_sk(asoc->base.sk);
+ struct sctp_af *af = sp->pf->af;
+
+ overhead = af->net_header_len +
+ af->ip_options_len(asoc->base.sk);
+ overhead += sizeof(struct sctphdr);
+ packet->overhead = overhead;
+ packet->size = overhead;
+ } else {
+ packet->overhead = overhead;
+ packet->size = overhead;
return;
+ }
/* update dst or transport pathmtu if in need */
sk = asoc->base.sk;
@@ -140,23 +157,14 @@ void sctp_packet_init(struct sctp_packet *packet,
struct sctp_transport *transport,
__u16 sport, __u16 dport)
{
- struct sctp_association *asoc = transport->asoc;
- size_t overhead;
-
pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport);
packet->transport = transport;
packet->source_port = sport;
packet->destination_port = dport;
INIT_LIST_HEAD(&packet->chunk_list);
- if (asoc) {
- struct sctp_sock *sp = sctp_sk(asoc->base.sk);
- overhead = sp->pf->af->net_header_len;
- } else {
- overhead = sizeof(struct ipv6hdr);
- }
- overhead += sizeof(struct sctphdr);
- packet->overhead = overhead;
+ /* The overhead will be calculated by sctp_packet_config() */
+ packet->overhead = 0;
sctp_packet_reset(packet);
packet->vtag = 0;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a24cde236330..d685f8456762 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -187,6 +187,45 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
return error;
}
+/* Copy over any ip options */
+static void sctp_v4_copy_ip_options(struct sock *sk, struct sock *newsk)
+{
+ struct inet_sock *newinet, *inet = inet_sk(sk);
+ struct ip_options_rcu *inet_opt, *newopt = NULL;
+
+ newinet = inet_sk(newsk);
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+ inet_opt->opt.optlen, GFP_ATOMIC);
+ if (newopt)
+ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+ inet_opt->opt.optlen);
+ else
+ pr_err("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newinet->inet_opt, newopt);
+ rcu_read_unlock();
+}
+
+/* Account for the IP options */
+static int sctp_v4_ip_options_len(struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ip_options_rcu *inet_opt;
+ int len = 0;
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt)
+ len = inet_opt->opt.optlen;
+
+ rcu_read_unlock();
+ return len;
+}
+
/* Initialize a sctp_addr from in incoming skb. */
static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
int is_saddr)
@@ -538,6 +577,8 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
sctp_copy_sock(newsk, sk, asoc);
sock_reset_flag(newsk, SOCK_ZAPPED);
+ sctp_v4_copy_ip_options(sk, newsk);
+
newinet = inet_sk(newsk);
newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
@@ -956,6 +997,7 @@ static struct sctp_pf sctp_pf_inet = {
.addr_to_user = sctp_v4_addr_to_user,
.to_sk_saddr = sctp_v4_to_sk_saddr,
.to_sk_daddr = sctp_v4_to_sk_daddr,
+ .copy_ip_options = sctp_v4_copy_ip_options,
.af = &sctp_af_inet
};
@@ -1040,6 +1082,7 @@ static struct sctp_af sctp_af_inet = {
.ecn_capable = sctp_v4_ecn_capable,
.net_header_len = sizeof(struct iphdr),
.sockaddr_len = sizeof(struct sockaddr_in),
+ .ip_options_len = sctp_v4_ip_options_len,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index cc20bc39ee7c..5a4fb1dc8400 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3098,6 +3098,12 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
if (af->is_any(&addr))
memcpy(&addr, &asconf->source, sizeof(addr));
+ if (security_sctp_bind_connect(asoc->ep->base.sk,
+ SCTP_PARAM_ADD_IP,
+ (struct sockaddr *)&addr,
+ af->sockaddr_len))
+ return SCTP_ERROR_REQ_REFUSED;
+
/* ADDIP 4.3 D9) If an endpoint receives an ADD IP address
* request and does not have the local resources to add this
* new address to the association, it MUST return an Error
@@ -3164,6 +3170,12 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
if (af->is_any(&addr))
memcpy(&addr.v4, sctp_source(asconf), sizeof(addr));
+ if (security_sctp_bind_connect(asoc->ep->base.sk,
+ SCTP_PARAM_SET_PRIMARY,
+ (struct sockaddr *)&addr,
+ af->sockaddr_len))
+ return SCTP_ERROR_REQ_REFUSED;
+
peer = sctp_assoc_lookup_paddr(asoc, &addr);
if (!peer)
return SCTP_ERROR_DNS_FAILED;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index cc56a67dbb4d..dd0594a10961 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -321,6 +321,11 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
struct sctp_packet *packet;
int len;
+ /* Update socket peer label if first association. */
+ if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
+ chunk->skb))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* 6.10 Bundling
* An endpoint MUST NOT bundle INIT, INIT ACK or
* SHUTDOWN COMPLETE with any other chunks.
@@ -922,6 +927,9 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
*/
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
+ /* Set peer label for connection. */
+ security_inet_conn_established(ep->base.sk, chunk->skb);
+
/* RFC 2960 5.1 Normal Establishment of an Association
*
* E) Upon reception of the COOKIE ACK, endpoint "A" will move
@@ -1459,6 +1467,11 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
struct sctp_packet *packet;
int len;
+ /* Update socket peer label if first association. */
+ if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
+ chunk->skb))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* 6.10 Bundling
* An endpoint MUST NOT bundle INIT, INIT ACK or
* SHUTDOWN COMPLETE with any other chunks.
@@ -2145,6 +2158,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
}
}
+ /* Update socket peer label if first association. */
+ if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
+ chunk->skb))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Set temp so that it won't be added into hashtable */
new_asoc->temp = 1;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index eb712df7156e..80835ac26d2c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1049,6 +1049,12 @@ static int sctp_setsockopt_bindx(struct sock *sk,
/* Do the work. */
switch (op) {
case SCTP_BINDX_ADD_ADDR:
+ /* Allow security module to validate bindx addresses. */
+ err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_BINDX_ADD,
+ (struct sockaddr *)kaddrs,
+ addrs_size);
+ if (err)
+ goto out;
err = sctp_bindx_add(sk, kaddrs, addrcnt);
if (err)
goto out;
@@ -1258,6 +1264,7 @@ static int __sctp_connect(struct sock *sk,
if (assoc_id)
*assoc_id = asoc->assoc_id;
+
err = sctp_wait_for_connect(asoc, &timeo);
/* Note: the asoc may be freed after the return of
* sctp_wait_for_connect.
@@ -1353,7 +1360,16 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
if (unlikely(IS_ERR(kaddrs)))
return PTR_ERR(kaddrs);
+ /* Allow security module to validate connectx addresses. */
+ err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_CONNECTX,
+ (struct sockaddr *)kaddrs,
+ addrs_size);
+ if (err)
+ goto out_free;
+
err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id);
+
+out_free:
kvfree(kaddrs);
return err;
@@ -1683,6 +1699,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
struct sctp_association *asoc;
enum sctp_scope scope;
struct cmsghdr *cmsg;
+ struct sctp_af *af;
int err;
*tp = NULL;
@@ -1708,6 +1725,21 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
scope = sctp_scope(daddr);
+ /* Label connection socket for first association 1-to-many
+ * style for client sequence socket()->sendmsg(). This
+ * needs to be done before sctp_assoc_add_peer() as that will
+ * set up the initial packet that needs to account for any
+ * security ip options (CIPSO/CALIPSO) added to the packet.
+ */
+ af = sctp_get_af_specific(daddr->sa.sa_family);
+ if (!af)
+ return -EINVAL;
+ err = security_sctp_bind_connect(sk, SCTP_SENDMSG_CONNECT,
+ (struct sockaddr *)daddr,
+ af->sockaddr_len);
+ if (err < 0)
+ return err;
+
asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
if (!asoc)
return -ENOMEM;
@@ -2935,6 +2967,8 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
{
struct sctp_prim prim;
struct sctp_transport *trans;
+ struct sctp_af *af;
+ int err;
if (optlen != sizeof(struct sctp_prim))
return -EINVAL;
@@ -2942,6 +2976,17 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
if (copy_from_user(&prim, optval, sizeof(struct sctp_prim)))
return -EFAULT;
+ /* Allow security module to validate address but need address len. */
+ af = sctp_get_af_specific(prim.ssp_addr.ss_family);
+ if (!af)
+ return -EINVAL;
+
+ err = security_sctp_bind_connect(sk, SCTP_PRIMARY_ADDR,
+ (struct sockaddr *)&prim.ssp_addr,
+ af->sockaddr_len);
+ if (err)
+ return err;
+
trans = sctp_addr_id2transport(sk, &prim.ssp_addr, prim.ssp_assoc_id);
if (!trans)
return -EINVAL;
@@ -3164,6 +3209,7 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
+ struct sctp_af *af = sp->pf->af;
struct sctp_assoc_value params;
struct sctp_association *asoc;
int val;
@@ -3188,7 +3234,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
if (val) {
int min_len, max_len;
- min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len;
+ min_len = SCTP_DEFAULT_MINSEGMENT - af->net_header_len;
+ min_len -= af->ip_options_len(sk);
min_len -= sizeof(struct sctphdr) +
sizeof(struct sctp_data_chunk);
@@ -3201,7 +3248,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
asoc = sctp_id2assoc(sk, params.assoc_id);
if (asoc) {
if (val == 0) {
- val = asoc->pathmtu - sp->pf->af->net_header_len;
+ val = asoc->pathmtu - af->net_header_len;
+ val -= af->ip_options_len(sk);
val -= sizeof(struct sctphdr) +
sctp_datachk_len(&asoc->stream);
}
@@ -3270,6 +3318,13 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim.sspp_addr))
return -EADDRNOTAVAIL;
+ /* Allow security module to validate address. */
+ err = security_sctp_bind_connect(sk, SCTP_SET_PEER_PRIMARY_ADDR,
+ (struct sockaddr *)&prim.sspp_addr,
+ af->sockaddr_len);
+ if (err)
+ return err;
+
/* Create an ASCONF chunk with SET_PRIMARY parameter */
chunk = sctp_make_asconf_set_prim(asoc,
(union sctp_addr *)&prim.sspp_addr);
@@ -5143,9 +5198,11 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
sctp_copy_sock(sock->sk, sk, asoc);
/* Make peeled-off sockets more like 1-1 accepted sockets.
- * Set the daddr and initialize id to something more random
+ * Set the daddr and initialize id to something more random and also
+ * copy over any ip options.
*/
sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
+ sp->pf->copy_ip_options(sk, sock->sk);
/* Populate the fields of the newsk from the oldsk and migrate the
* asoc to the newsk.
@@ -8468,6 +8525,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
{
struct inet_sock *inet = inet_sk(sk);
struct inet_sock *newinet;
+ struct sctp_sock *sp = sctp_sk(sk);
+ struct sctp_endpoint *ep = sp->ep;
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
@@ -8510,7 +8569,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
net_enable_timestamp();
- security_sk_clone(sk, newsk);
+ /* Set newsk security attributes from orginal sk and connection
+ * security attribute from ep.
+ */
+ security_sctp_sk_clone(ep, sk, newsk);
}
static inline void sctp_copy_descendant(struct sock *sk_to,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5f8046c62d90..f5d4b69dbabc 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1259,14 +1259,12 @@ static int smc_shutdown(struct socket *sock, int how)
rc = smc_close_shutdown_write(smc);
break;
case SHUT_RD:
- if (sk->sk_state == SMC_LISTEN)
- rc = smc_close_active(smc);
- else
- rc = 0;
- /* nothing more to do because peer is not involved */
+ rc = 0;
+ /* nothing more to do because peer is not involved */
break;
}
- rc1 = kernel_sock_shutdown(smc->clcsock, how);
+ if (smc->clcsock)
+ rc1 = kernel_sock_shutdown(smc->clcsock, how);
/* map sock_shutdown_cmd constants to sk_shutdown value range */
sk->sk_shutdown |= how + 1;
diff --git a/net/socket.c b/net/socket.c
index 54dcb43e25a1..f10f1d947c78 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1536,7 +1536,7 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
*
* 1003.1g adds the ability to recvmsg() to query connection pending
* status to recvmsg. We need to add that support in a way thats
- * clean when we restucture accept also.
+ * clean when we restructure accept also.
*/
int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index b9283ce5cd85..092bebc70048 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -67,7 +67,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
static void strp_start_timer(struct strparser *strp, long timeo)
{
- if (timeo)
+ if (timeo && timeo != LONG_MAX)
mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo);
}
@@ -296,9 +296,9 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
strp_start_timer(strp, timeo);
}
+ stm->accum_len += cand_len;
strp->need_bytes = stm->strp.full_len -
stm->accum_len;
- stm->accum_len += cand_len;
stm->early_eaten = cand_len;
STRP_STATS_ADD(strp->stats.bytes, cand_len);
desc->count = 0; /* Stop reading socket */
@@ -321,6 +321,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
/* Hurray, we have a new message! */
cancel_delayed_work(&strp->msg_timer_work);
strp->skb_head = NULL;
+ strp->need_bytes = 0;
STRP_STATS_INCR(strp->stats.msgs);
/* Give skb to upper layer */
@@ -410,9 +411,7 @@ void strp_data_ready(struct strparser *strp)
return;
if (strp->need_bytes) {
- if (strp_peek_len(strp) >= strp->need_bytes)
- strp->need_bytes = 0;
- else
+ if (strp_peek_len(strp) < strp->need_bytes)
return;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 12649c9fedab..8654494b4d0a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -237,9 +237,6 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
- err = crypto_ahash_init(req);
- if (err)
- goto out;
err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
if (err)
goto out;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 1d74d653e6c0..94a2b3f082a8 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -177,6 +177,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
u64 seq_send;
u8 *cksumkey;
unsigned int cksum_usage;
+ __be64 seq_send_be64;
dprintk("RPC: %s\n", __func__);
@@ -187,7 +188,9 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send64++;
spin_unlock(&krb5_seq_lock);
- *((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+
+ seq_send_be64 = cpu_to_be64(seq_send);
+ memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
if (ctx->initiate) {
cksumkey = ctx->initiator_sign;
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index dcf9515d9aef..b601a73cc9db 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -155,10 +155,12 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
u8 flags;
int i;
unsigned int cksum_usage;
+ __be16 be16_ptr;
dprintk("RPC: %s\n", __func__);
- if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
+ memcpy(&be16_ptr, (char *) ptr, 2);
+ if (be16_to_cpu(be16_ptr) != KG2_TOK_MIC)
return GSS_S_DEFECTIVE_TOKEN;
flags = ptr[2];
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index c536cc24b3d1..cdda4744c9b1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1450,8 +1450,8 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
struct cache_detail *cd)
{
char tbuf[20];
- char *bp, *ep;
- time_t then, now;
+ char *ep;
+ time_t now;
if (*ppos || count > sizeof(tbuf)-1)
return -EINVAL;
@@ -1461,24 +1461,24 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
simple_strtoul(tbuf, &ep, 0);
if (*ep && *ep != '\n')
return -EINVAL;
+ /* Note that while we check that 'buf' holds a valid number,
+ * we always ignore the value and just flush everything.
+ * Making use of the number leads to races.
+ */
- bp = tbuf;
- then = get_expiry(&bp);
now = seconds_since_boot();
- cd->nextcheck = now;
- /* Can only set flush_time to 1 second beyond "now", or
- * possibly 1 second beyond flushtime. This is because
- * flush_time never goes backwards so it mustn't get too far
- * ahead of time.
+ /* Always flush everything, so behave like cache_purge()
+ * Do this by advancing flush_time to the current time,
+ * or by one second if it has already reached the current time.
+ * Newly added cache entries will always have ->last_refresh greater
+ * that ->flush_time, so they don't get flushed prematurely.
*/
- if (then >= now) {
- /* Want to flush everything, so behave like cache_purge() */
- if (cd->flush_time >= now)
- now = cd->flush_time + 1;
- then = now;
- }
- cd->flush_time = then;
+ if (cd->flush_time >= now)
+ now = cd->flush_time + 1;
+
+ cd->flush_time = now;
+ cd->nextcheck = now;
cache_flush();
*ppos += count;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 806395687bb6..c2266f387213 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1887,7 +1887,7 @@ call_connect_status(struct rpc_task *task)
dprint_status(task);
- trace_rpc_connect_status(task, status);
+ trace_rpc_connect_status(task);
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
@@ -2014,6 +2014,9 @@ call_transmit_status(struct rpc_task *task)
case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
xprt_end_transmit(task);
+ if (!task->tk_msg.rpc_proc->p_proc)
+ trace_xprt_ping(task->tk_xprt,
+ task->tk_status);
rpc_exit(task, task->tk_status);
break;
}
@@ -2112,6 +2115,9 @@ call_status(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
int status;
+ if (!task->tk_msg.rpc_proc->p_proc)
+ trace_xprt_ping(task->tk_xprt, task->tk_status);
+
if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
task->tk_status = req->rq_reply_bytes_recvd;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0f08934b2cea..c81ef5e6c981 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1375,6 +1375,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
struct dentry *clnt_dir = pipe_dentry->d_parent;
struct dentry *gssd_dir = clnt_dir->d_parent;
+ dget(pipe_dentry);
__rpc_rmpipe(d_inode(clnt_dir), pipe_dentry);
__rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1);
__rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d9db2eab3a8d..3fe5d60ab0e2 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -276,7 +276,7 @@ static void rpc_set_active(struct rpc_task *task)
{
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
- trace_rpc_task_begin(task->tk_client, task, NULL);
+ trace_rpc_task_begin(task, NULL);
}
/*
@@ -291,7 +291,7 @@ static int rpc_complete_task(struct rpc_task *task)
unsigned long flags;
int ret;
- trace_rpc_task_complete(task->tk_client, task, NULL);
+ trace_rpc_task_complete(task, NULL);
spin_lock_irqsave(&wq->lock, flags);
clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
@@ -358,7 +358,7 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
task->tk_pid, rpc_qname(q), jiffies);
- trace_rpc_task_sleep(task->tk_client, task, q);
+ trace_rpc_task_sleep(task, q);
__rpc_add_wait_queue(q, task, queue_priority);
@@ -428,7 +428,7 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
return;
}
- trace_rpc_task_wakeup(task->tk_client, task, queue);
+ trace_rpc_task_wakeup(task, queue);
__rpc_remove_wait_queue(queue, task);
@@ -780,7 +780,7 @@ static void __rpc_execute(struct rpc_task *task)
}
if (!do_action)
break;
- trace_rpc_task_run_action(task->tk_client, task, do_action);
+ trace_rpc_task_run_action(task, do_action);
do_action(task);
/*
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 1e671333c3d5..f68aa46c9dd7 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -24,6 +24,8 @@
#include <linux/sunrpc/metrics.h>
#include <linux/rcupdate.h>
+#include <trace/events/sunrpc.h>
+
#include "netns.h"
#define RPCDBG_FACILITY RPCDBG_MISC
@@ -148,7 +150,7 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
struct rpc_iostats *op_metrics)
{
struct rpc_rqst *req = task->tk_rqstp;
- ktime_t delta, now;
+ ktime_t backlog, execute, now;
if (!op_metrics || !req)
return;
@@ -164,16 +166,20 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
+ backlog = 0;
if (ktime_to_ns(req->rq_xtime)) {
- delta = ktime_sub(req->rq_xtime, task->tk_start);
- op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
+ backlog = ktime_sub(req->rq_xtime, task->tk_start);
+ op_metrics->om_queue = ktime_add(op_metrics->om_queue, backlog);
}
+
op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
- delta = ktime_sub(now, task->tk_start);
- op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
+ execute = ktime_sub(now, task->tk_start);
+ op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
spin_unlock(&op_metrics->om_lock);
+
+ trace_rpc_stats_latency(req->rq_task, backlog, req->rq_rtt, execute);
}
EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics);
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index f2b7cb540e61..09a0315ea77b 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -37,12 +37,6 @@ struct rpc_buffer {
char data[];
};
-static inline int rpc_reply_expected(struct rpc_task *task)
-{
- return (task->tk_msg.rpc_proc != NULL) &&
- (task->tk_msg.rpc_proc->p_decode != NULL);
-}
-
static inline int sock_is_loopback(struct sock *sk)
{
struct dst_entry *dst;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 387cc4add6f6..30a4226baf03 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1255,6 +1255,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
/* Syntactic check complete */
serv->sv_stats->rpccnt++;
+ trace_svc_process(rqstp, progp->pg_name);
/* Build the reply header. */
statp = resv->iov_base +resv->iov_len;
@@ -1431,14 +1432,10 @@ svc_process(struct svc_rqst *rqstp)
}
/* Returns 1 for send, 0 for drop */
- if (likely(svc_process_common(rqstp, argv, resv))) {
- int ret = svc_send(rqstp);
+ if (likely(svc_process_common(rqstp, argv, resv)))
+ return svc_send(rqstp);
- trace_svc_process(rqstp, ret);
- return ret;
- }
out_drop:
- trace_svc_process(rqstp, 0);
svc_drop(rqstp);
return 0;
}
@@ -1536,3 +1533,112 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
return max;
}
EXPORT_SYMBOL_GPL(svc_max_payload);
+
+/**
+ * svc_fill_write_vector - Construct data argument for VFS write call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of write payload
+ * @total: total number of bytes of write payload
+ *
+ * Returns the number of elements populated in the data argument array.
+ */
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
+ size_t total)
+{
+ struct kvec *vec = rqstp->rq_vec;
+ struct page **pages;
+ unsigned int i;
+
+ /* Some types of transport can present the write payload
+ * entirely in rq_arg.pages. In this case, @first is empty.
+ */
+ i = 0;
+ if (first->iov_len) {
+ vec[i].iov_base = first->iov_base;
+ vec[i].iov_len = min_t(size_t, total, first->iov_len);
+ total -= vec[i].iov_len;
+ ++i;
+ }
+
+ WARN_ON_ONCE(rqstp->rq_arg.page_base != 0);
+ pages = rqstp->rq_arg.pages;
+ while (total) {
+ vec[i].iov_base = page_address(*pages);
+ vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
+ total -= vec[i].iov_len;
+ ++i;
+
+ ++pages;
+ }
+
+ WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec));
+ return i;
+}
+EXPORT_SYMBOL_GPL(svc_fill_write_vector);
+
+/**
+ * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of pathname
+ * @total: total length of the pathname argument
+ *
+ * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
+ * released automatically when @rqstp is recycled.
+ */
+char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
+ size_t total)
+{
+ struct xdr_buf *arg = &rqstp->rq_arg;
+ struct page **pages;
+ char *result;
+
+ /* VFS API demands a NUL-terminated pathname. This function
+ * uses a page from @rqstp as the pathname buffer, to enable
+ * direct placement. Thus the total buffer size is PAGE_SIZE.
+ * Space in this buffer for NUL-termination requires that we
+ * cap the size of the returned symlink pathname just a
+ * little early.
+ */
+ if (total > PAGE_SIZE - 1)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ /* Some types of transport can present the pathname entirely
+ * in rq_arg.pages. If not, then copy the pathname into one
+ * page.
+ */
+ pages = arg->pages;
+ WARN_ON_ONCE(arg->page_base != 0);
+ if (first->iov_base == 0) {
+ result = page_address(*pages);
+ result[total] = '\0';
+ } else {
+ size_t len, remaining;
+ char *dst;
+
+ result = page_address(*(rqstp->rq_next_page++));
+ dst = result;
+ remaining = total;
+
+ len = min_t(size_t, total, first->iov_len);
+ memcpy(dst, first->iov_base, len);
+ dst += len;
+ remaining -= len;
+
+ /* No more than one page left */
+ if (remaining) {
+ len = min_t(size_t, remaining, PAGE_SIZE);
+ memcpy(dst, page_address(*pages), len);
+ dst += len;
+ }
+
+ *dst = '\0';
+ }
+
+ /* Sanity check: we don't allow the pathname argument to
+ * contain a NUL byte.
+ */
+ if (strlen(result) != total)
+ return ERR_PTR(-EINVAL);
+ return result;
+}
+EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index f9307bd6644b..5185efb9027b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -173,6 +173,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
set_bit(XPT_BUSY, &xprt->xpt_flags);
rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
xprt->xpt_net = get_net(net);
+ strcpy(xprt->xpt_remotebuf, "uninitialized");
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
@@ -382,25 +383,21 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
int cpu;
if (!svc_xprt_has_something_to_do(xprt))
- goto out;
+ return;
/* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY
* atomically because it also guards against trying to enqueue
* the transport twice.
*/
- if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
- /* Don't enqueue transport while already enqueued */
- dprintk("svc: transport %p busy, not enqueued\n", xprt);
- goto out;
- }
+ if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
+ return;
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
atomic_long_inc(&pool->sp_stats.packets);
- dprintk("svc: transport %p put into queue\n", xprt);
spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
@@ -412,6 +409,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
atomic_long_inc(&pool->sp_stats.threads_woken);
+ rqstp->rq_qtime = ktime_get();
wake_up_process(rqstp->rq_task);
goto out_unlock;
}
@@ -420,7 +418,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
out_unlock:
rcu_read_unlock();
put_cpu();
-out:
trace_svc_xprt_do_enqueue(xprt, rqstp);
}
EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
@@ -454,13 +451,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
struct svc_xprt, xpt_ready);
list_del_init(&xprt->xpt_ready);
svc_xprt_get(xprt);
-
- dprintk("svc: transport %p dequeued, inuse=%d\n",
- xprt, kref_read(&xprt->xpt_ref));
}
spin_unlock_bh(&pool->sp_lock);
out:
- trace_svc_xprt_dequeue(xprt);
return xprt;
}
@@ -492,7 +485,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
- rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+ xprt->xpt_ops->xpo_release_rqst(rqstp);
kfree(rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
@@ -538,7 +531,6 @@ void svc_wake_up(struct svc_serv *serv)
if (test_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
rcu_read_unlock();
- dprintk("svc: daemon %p woken up.\n", rqstp);
wake_up_process(rqstp->rq_task);
trace_svc_wake_up(rqstp->rq_task->pid);
return;
@@ -734,6 +726,7 @@ out_found:
rqstp->rq_chandle.thread_wait = 5*HZ;
else
rqstp->rq_chandle.thread_wait = 1*HZ;
+ trace_svc_xprt_dequeue(rqstp);
return rqstp->rq_xprt;
}
@@ -789,7 +782,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
len = svc_deferred_recv(rqstp);
else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
- dprintk("svc: got len=%d\n", len);
+ rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
}
@@ -844,10 +837,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags);
- if (xprt->xpt_ops->xpo_secure_port(rqstp))
- set_bit(RQ_SECURE, &rqstp->rq_flags);
- else
- clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ xprt->xpt_ops->xpo_secure_port(rqstp);
rqstp->rq_chandle.defer = svc_defer;
rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]);
@@ -859,7 +849,6 @@ out_release:
rqstp->rq_res.len = 0;
svc_xprt_release(rqstp);
out:
- trace_svc_recv(rqstp, err);
return err;
}
EXPORT_SYMBOL_GPL(svc_recv);
@@ -889,7 +878,7 @@ int svc_send(struct svc_rqst *rqstp)
goto out;
/* release the receive skb before sending the reply */
- rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+ xprt->xpt_ops->xpo_release_rqst(rqstp);
/* calculate over-all length */
xb = &rqstp->rq_res;
@@ -899,6 +888,7 @@ int svc_send(struct svc_rqst *rqstp)
/* Grab mutex to serialize outgoing data. */
mutex_lock(&xprt->xpt_mutex);
+ trace_svc_stats_latency(rqstp);
if (test_bit(XPT_DEAD, &xprt->xpt_flags)
|| test_bit(XPT_CLOSE, &xprt->xpt_flags))
len = -ENOTCONN;
@@ -906,12 +896,12 @@ int svc_send(struct svc_rqst *rqstp)
len = xprt->xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&xprt->xpt_mutex);
rpc_wake_up(&xprt->xpt_bc_pending);
+ trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);
if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
len = 0;
out:
- trace_svc_send(rqstp, len);
return len;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 08cd951aaeea..5445145e639c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -391,9 +391,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
release_sock(sock->sk);
}
-static int svc_sock_secure_port(struct svc_rqst *rqstp)
+static void svc_sock_secure_port(struct svc_rqst *rqstp)
{
- return svc_port_is_privileged(svc_addr(rqstp));
+ if (svc_port_is_privileged(svc_addr(rqstp)))
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
+ else
+ clear_bit(RQ_SECURE, &rqstp->rq_flags);
}
/*
@@ -1309,6 +1312,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
+ strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index e34f4ee7f2b6..30afbd236656 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1519,6 +1519,88 @@ out:
EXPORT_SYMBOL_GPL(xdr_process_buf);
/**
+ * xdr_stream_decode_opaque - Decode variable length opaque
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store opaque data
+ * @size: size of storage buffer @ptr
+ *
+ * Return values:
+ * On success, returns size of object stored in *@ptr
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE on overflow of storage buffer @ptr
+ */
+ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size)
+{
+ ssize_t ret;
+ void *p;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
+ if (ret <= 0)
+ return ret;
+ memcpy(ptr, p, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque);
+
+/**
+ * xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store pointer to opaque data
+ * @maxlen: maximum acceptable object size
+ * @gfp_flags: GFP mask to use
+ *
+ * Return values:
+ * On success, returns size of object stored in *@ptr
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE if the size of the object would exceed @maxlen
+ * %-ENOMEM on memory allocation failure
+ */
+ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr,
+ size_t maxlen, gfp_t gfp_flags)
+{
+ ssize_t ret;
+ void *p;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
+ if (ret > 0) {
+ *ptr = kmemdup(p, ret, gfp_flags);
+ if (*ptr != NULL)
+ return ret;
+ ret = -ENOMEM;
+ }
+ *ptr = NULL;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup);
+
+/**
+ * xdr_stream_decode_string - Decode variable length string
+ * @xdr: pointer to xdr_stream
+ * @str: location to store string
+ * @size: size of storage buffer @str
+ *
+ * Return values:
+ * On success, returns length of NUL-terminated string stored in *@str
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE on overflow of storage buffer @str
+ */
+ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size)
+{
+ ssize_t ret;
+ void *p;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
+ if (ret > 0) {
+ memcpy(str, p, ret);
+ str[ret] = '\0';
+ return strlen(str);
+ }
+ *str = '\0';
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_string);
+
+/**
* xdr_stream_decode_string_dup - Decode and duplicate variable length string
* @xdr: pointer to xdr_stream
* @str: location to store pointer to string
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8f0ad4f268da..70f005044f06 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -826,6 +826,7 @@ static void xprt_connect_status(struct rpc_task *task)
* @xprt: transport on which the original request was transmitted
* @xid: RPC XID of incoming reply
*
+ * Caller holds xprt->recv_lock.
*/
struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
{
@@ -834,6 +835,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
list_for_each_entry(entry, &xprt->recv, rq_list)
if (entry->rq_xid == xid) {
trace_xprt_lookup_rqst(xprt, xid, 0);
+ entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
return entry;
}
@@ -889,7 +891,13 @@ __must_hold(&req->rq_xprt->recv_lock)
}
}
-static void xprt_update_rtt(struct rpc_task *task)
+/**
+ * xprt_update_rtt - Update RPC RTT statistics
+ * @task: RPC request that recently completed
+ *
+ * Caller holds xprt->recv_lock.
+ */
+void xprt_update_rtt(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_rtt *rtt = task->tk_client->cl_rtt;
@@ -902,13 +910,14 @@ static void xprt_update_rtt(struct rpc_task *task)
rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
}
}
+EXPORT_SYMBOL_GPL(xprt_update_rtt);
/**
* xprt_complete_rqst - called when reply processing is complete
* @task: RPC request that recently completed
* @copied: actual number of bytes received from the transport
*
- * Caller holds transport lock.
+ * Caller holds xprt->recv_lock.
*/
void xprt_complete_rqst(struct rpc_task *task, int copied)
{
@@ -920,9 +929,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
trace_xprt_complete_rqst(xprt, req->rq_xid, copied);
xprt->stat.recvs++;
- req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
- if (xprt->ops->timer != NULL)
- xprt_update_rtt(task);
list_del_init(&req->rq_list);
req->rq_private_buf.len = copied;
@@ -1003,7 +1009,7 @@ void xprt_transmit(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
unsigned int connect_cookie;
- int status, numreqs;
+ int status;
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1027,7 +1033,6 @@ void xprt_transmit(struct rpc_task *task)
return;
connect_cookie = xprt->connect_cookie;
- req->rq_xtime = ktime_get();
status = xprt->ops->send_request(task);
trace_xprt_transmit(xprt, req->rq_xid, status);
if (status != 0) {
@@ -1042,9 +1047,6 @@ void xprt_transmit(struct rpc_task *task)
xprt->ops->set_retrans_timeout(task);
- numreqs = atomic_read(&xprt->num_reqs);
- if (numreqs > xprt->stat.max_slots)
- xprt->stat.max_slots = numreqs;
xprt->stat.sends++;
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
xprt->stat.bklog_u += xprt->backlog.qlen;
@@ -1106,14 +1108,15 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
- if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
+ if (xprt->num_reqs >= xprt->max_reqs)
goto out;
+ ++xprt->num_reqs;
spin_unlock(&xprt->reserve_lock);
req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
- atomic_dec(&xprt->num_reqs);
+ --xprt->num_reqs;
req = ERR_PTR(-ENOMEM);
out:
return req;
@@ -1121,7 +1124,8 @@ out:
static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
- if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
+ if (xprt->num_reqs > xprt->min_reqs) {
+ --xprt->num_reqs;
kfree(req);
return true;
}
@@ -1157,6 +1161,8 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
spin_unlock(&xprt->reserve_lock);
return;
out_init_req:
+ xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots,
+ xprt->num_reqs);
task->tk_status = 0;
task->tk_rqstp = req;
xprt_request_init(task, xprt);
@@ -1224,7 +1230,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
else
xprt->max_reqs = num_prealloc;
xprt->min_reqs = num_prealloc;
- atomic_set(&xprt->num_reqs, num_prealloc);
+ xprt->num_reqs = num_prealloc;
return xprt;
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index ed1a4a3065ee..47ebac949769 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -44,13 +44,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
if (IS_ERR(req))
return PTR_ERR(req);
- rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
- DMA_TO_DEVICE, GFP_KERNEL);
- if (IS_ERR(rb))
- goto out_fail;
- req->rl_rdmabuf = rb;
- xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
-
size = r_xprt->rx_data.inline_rsize;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index d5f95bb39300..5cc68a824f45 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -191,7 +191,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr = rpcrdma_mr_get(r_xprt);
if (!mr)
- return ERR_PTR(-ENOBUFS);
+ return ERR_PTR(-EAGAIN);
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
@@ -251,6 +251,16 @@ out_maperr:
return ERR_PTR(-EIO);
}
+/* Post Send WR containing the RPC Call message.
+ */
+static int
+fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *bad_wr;
+
+ return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr);
+}
+
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
@@ -305,6 +315,7 @@ out_reset:
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
+ .ro_send = fmr_op_send,
.ro_unmap_sync = fmr_op_unmap_sync,
.ro_recover_mr = fmr_op_recover_mr,
.ro_open = fmr_op_open,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 90f688f19783..c5743a0960be 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -357,8 +357,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct rpcrdma_mr *mr;
struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
- struct ib_send_wr *bad_wr;
- int rc, i, n;
+ int i, n;
u8 key;
mr = NULL;
@@ -367,7 +366,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
rpcrdma_mr_defer_recovery(mr);
mr = rpcrdma_mr_get(r_xprt);
if (!mr)
- return ERR_PTR(-ENOBUFS);
+ return ERR_PTR(-EAGAIN);
} while (mr->frwr.fr_state != FRWR_IS_INVALID);
frwr = &mr->frwr;
frwr->fr_state = FRWR_IS_VALID;
@@ -407,22 +406,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
ib_update_fast_reg_key(ibmr, ++key);
reg_wr = &frwr->fr_regwr;
- reg_wr->wr.next = NULL;
- reg_wr->wr.opcode = IB_WR_REG_MR;
- frwr->fr_cqe.done = frwr_wc_fastreg;
- reg_wr->wr.wr_cqe = &frwr->fr_cqe;
- reg_wr->wr.num_sge = 0;
- reg_wr->wr.send_flags = 0;
reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
- rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
- if (rc)
- goto out_senderr;
-
mr->mr_handle = ibmr->rkey;
mr->mr_length = ibmr->length;
mr->mr_offset = ibmr->iova;
@@ -442,11 +431,40 @@ out_mapmr_err:
frwr->fr_mr, n, mr->mr_nents);
rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO);
+}
-out_senderr:
- pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
- rpcrdma_mr_defer_recovery(mr);
- return ERR_PTR(-ENOTCONN);
+/* Post Send WR containing the RPC Call message.
+ *
+ * For FRMR, chain any FastReg WRs to the Send WR. Only a
+ * single ib_post_send call is needed to register memory
+ * and then post the Send WR.
+ */
+static int
+frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *post_wr, *bad_wr;
+ struct rpcrdma_mr *mr;
+
+ post_wr = &req->rl_sendctx->sc_wr;
+ list_for_each_entry(mr, &req->rl_registered, mr_list) {
+ struct rpcrdma_frwr *frwr;
+
+ frwr = &mr->frwr;
+
+ frwr->fr_cqe.done = frwr_wc_fastreg;
+ frwr->fr_regwr.wr.next = post_wr;
+ frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
+ frwr->fr_regwr.wr.num_sge = 0;
+ frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
+ frwr->fr_regwr.wr.send_flags = 0;
+
+ post_wr = &frwr->fr_regwr.wr;
+ }
+
+ /* If ib_post_send fails, the next ->send_request for
+ * @req will queue these MWs for recovery.
+ */
+ return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
}
/* Handle a remotely invalidated mr on the @mrs list
@@ -561,6 +579,7 @@ reset_mrs:
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_send = frwr_op_send,
.ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f0855a959a27..e8adad33d0bb 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
false, &mr);
if (IS_ERR(seg))
- return PTR_ERR(seg);
+ goto out_maperr;
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_read_segment(xdr, mr, pos) < 0)
@@ -377,6 +377,11 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
} while (nsegs);
return 0;
+
+out_maperr:
+ if (PTR_ERR(seg) == -EAGAIN)
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ return PTR_ERR(seg);
}
/* Register and XDR encode the Write list. Supports encoding a list
@@ -423,7 +428,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- return PTR_ERR(seg);
+ goto out_maperr;
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -440,6 +445,11 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
+
+out_maperr:
+ if (PTR_ERR(seg) == -EAGAIN)
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ return PTR_ERR(seg);
}
/* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -481,7 +491,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- return PTR_ERR(seg);
+ goto out_maperr;
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -498,6 +508,11 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
+
+out_maperr:
+ if (PTR_ERR(seg) == -EAGAIN)
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ return PTR_ERR(seg);
}
/**
@@ -724,8 +739,8 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
* Returns:
* %0 if the RPC was sent successfully,
* %-ENOTCONN if the connection was lost,
- * %-EAGAIN if not enough pages are available for on-demand reply buffer,
- * %-ENOBUFS if no MRs are available to register chunks,
+ * %-EAGAIN if the caller should call again with the same arguments,
+ * %-ENOBUFS if the caller should call again after a delay,
* %-EMSGSIZE if the transport header is too small,
* %-EIO if a permanent problem occurred while marshaling.
*/
@@ -868,10 +883,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
return 0;
out_err:
- if (ret != -ENOBUFS) {
- pr_err("rpcrdma: header marshaling failed (%d)\n", ret);
- r_xprt->rx_stats.failed_marshal_count++;
- }
+ r_xprt->rx_stats.failed_marshal_count++;
return ret;
}
@@ -1366,7 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
- queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
+ queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus:
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index a4a8f6989ee7..dd8a431dc2ae 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -51,9 +51,9 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* RPC/RDMA parameters */
-unsigned int svcrdma_ord = RPCRDMA_ORD;
+unsigned int svcrdma_ord = 16; /* historical default */
static unsigned int min_ord = 1;
-static unsigned int max_ord = 4096;
+static unsigned int max_ord = 255;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
static unsigned int min_max_requests = 4;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 19e9c6b33042..3d45015dca97 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -110,15 +110,16 @@
* the RDMA_RECV completion. The SGL should contain full pages up until the
* last one.
*/
-static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *ctxt,
- u32 byte_count)
+static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *ctxt)
{
struct page *page;
- u32 bc;
int sge_no;
+ u32 len;
- /* Swap the page in the SGE with the page in argpages */
+ /* The reply path assumes the Call's transport header resides
+ * in rqstp->rq_pages[0].
+ */
page = ctxt->pages[0];
put_page(rqstp->rq_pages[0]);
rqstp->rq_pages[0] = page;
@@ -126,35 +127,35 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page);
rqstp->rq_arg.head[0].iov_len =
- min_t(size_t, byte_count, ctxt->sge[0].length);
- rqstp->rq_arg.len = byte_count;
- rqstp->rq_arg.buflen = byte_count;
+ min_t(size_t, ctxt->byte_len, ctxt->sge[0].length);
+ rqstp->rq_arg.len = ctxt->byte_len;
+ rqstp->rq_arg.buflen = ctxt->byte_len;
/* Compute bytes past head in the SGL */
- bc = byte_count - rqstp->rq_arg.head[0].iov_len;
+ len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len;
/* If data remains, store it in the pagelist */
- rqstp->rq_arg.page_len = bc;
+ rqstp->rq_arg.page_len = len;
rqstp->rq_arg.page_base = 0;
sge_no = 1;
- while (bc && sge_no < ctxt->count) {
+ while (len && sge_no < ctxt->count) {
page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page;
- bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
+ len -= min_t(u32, len, ctxt->sge[sge_no].length);
sge_no++;
}
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* If not all pages were used from the SGL, free the remaining ones */
- bc = sge_no;
+ len = sge_no;
while (sge_no < ctxt->count) {
page = ctxt->pages[sge_no++];
put_page(page);
}
- ctxt->count = bc;
+ ctxt->count = len;
/* Set up tail */
rqstp->rq_arg.tail[0].iov_base = NULL;
@@ -534,10 +535,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
ctxt, rdma_xprt, rqstp);
atomic_inc(&rdma_stat_recv);
- /* Build up the XDR from the receive buffers. */
- rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
+ svc_rdma_build_arg_xdr(rqstp, ctxt);
- /* Decode the RDMA header. */
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
if (ret < 0)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9ad12a215b51..96cc8f6597d3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -69,7 +69,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
-static int svc_rdma_secure_port(struct svc_rqst *);
+static void svc_rdma_secure_port(struct svc_rqst *);
static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
static const struct svc_xprt_ops svc_rdma_ops = {
@@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_warn("svcrdma: receive: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
+ pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
+ ib_wc_status_msg(wc->status),
+ wc->status, wc->vendor_err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1);
@@ -401,8 +401,10 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
*/
set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
- if (listener)
+ if (listener) {
+ strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
+ }
return cma_xprt;
}
@@ -762,13 +764,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!svc_rdma_prealloc_ctxts(newxprt))
goto errout;
- /*
- * Limit ORD based on client limit, local device limit, and
- * configured svcrdma limit.
- */
- newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
- newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
-
newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
@@ -843,15 +838,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
memset(&conn_param, 0, sizeof conn_param);
conn_param.responder_resources = 0;
- conn_param.initiator_depth = newxprt->sc_ord;
+ conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
+ dev->attrs.max_qp_init_rd_atom);
+ if (!conn_param.initiator_depth) {
+ dprintk("svcrdma: invalid ORD setting\n");
+ ret = -EINVAL;
+ goto errout;
+ }
conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
- if (ret) {
- dprintk("svcrdma: failed to accept new connection, ret=%d\n",
- ret);
+ if (ret)
goto errout;
- }
dprintk("svcrdma: new connection %p accepted:\n", newxprt);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
@@ -862,7 +860,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
- dprintk(" ord : %d\n", newxprt->sc_ord);
+ dprintk(" ord : %d\n", conn_param.initiator_depth);
return &newxprt->sc_xprt;
@@ -992,9 +990,9 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
-static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+static void svc_rdma_secure_port(struct svc_rqst *rqstp)
{
- return 1;
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
}
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 4b1ecfe979cf..cc1aad325496 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,7 +52,6 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
-#include <linux/smp.h>
#include "xprt_rdma.h"
@@ -237,8 +236,6 @@ rpcrdma_connect_worker(struct work_struct *work)
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
spin_lock_bh(&xprt->transport_lock);
- if (++xprt->connect_cookie == 0) /* maintain a reserved value */
- ++xprt->connect_cookie;
if (ep->rep_connected > 0) {
if (!xprt_test_and_set_connected(xprt))
xprt_wake_pending_tasks(xprt, 0);
@@ -540,29 +537,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
-/* Allocate a fixed-size buffer in which to construct and send the
- * RPC-over-RDMA header for this request.
- */
-static bool
-rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- gfp_t flags)
-{
- size_t size = RPCRDMA_HDRBUF_SIZE;
- struct rpcrdma_regbuf *rb;
-
- if (req->rl_rdmabuf)
- return true;
-
- rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
- if (IS_ERR(rb))
- return false;
-
- r_xprt->rx_stats.hardway_register_count += size;
- req->rl_rdmabuf = rb;
- xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
- return true;
-}
-
static bool
rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
size_t size, gfp_t flags)
@@ -644,15 +618,11 @@ xprt_rdma_allocate(struct rpc_task *task)
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
- if (!rpcrdma_get_rdmabuf(r_xprt, req, flags))
- goto out_fail;
if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
goto out_fail;
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- req->rl_cpu = smp_processor_id();
- req->rl_connect_cookie = 0; /* our reserved value */
rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
@@ -694,7 +664,8 @@ xprt_rdma_free(struct rpc_task *task)
* Returns:
* %0 if the RPC message has been sent
* %-ENOTCONN if the caller should reconnect and call again
- * %-ENOBUFS if the caller should call again later
+ * %-EAGAIN if the caller should call again
+ * %-ENOBUFS if the caller should call again after a delay
* %-EIO if a permanent error occurred and the request was not
* sent. Do not try to send this message again.
*/
@@ -723,9 +694,9 @@ xprt_rdma_send_request(struct rpc_task *task)
rpcrdma_recv_buffer_get(req);
/* Must suppress retransmit to maintain credits */
- if (req->rl_connect_cookie == xprt->connect_cookie)
+ if (rqst->rq_connect_cookie == xprt->connect_cookie)
goto drop_connection;
- req->rl_connect_cookie = xprt->connect_cookie;
+ rqst->rq_xtime = ktime_get();
__set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
@@ -733,6 +704,12 @@ xprt_rdma_send_request(struct rpc_task *task)
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
+
+ /* An RPC with no reply will throw off credit accounting,
+ * so drop the connection to reset the credit grant.
+ */
+ if (!rpc_reply_expected(task))
+ goto drop_connection;
return 0;
failed_marshal:
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e6f84a6434a0..fe5eaca2d197 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -250,11 +250,11 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
wait_for_completion(&ia->ri_remove_done);
ia->ri_id = NULL;
- ia->ri_pd = NULL;
ia->ri_device = NULL;
/* Return 1 to ensure the core destroys the id. */
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
+ ++xprt->rx_xprt.connect_cookie;
connstate = 1;
rpcrdma_update_connect_private(xprt, &event->param.conn);
goto connected;
@@ -273,6 +273,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -EAGAIN;
goto connected;
case RDMA_CM_EVENT_DISCONNECTED:
+ ++xprt->rx_xprt.connect_cookie;
connstate = -ECONNABORTED;
connected:
xprt->rx_buf.rb_credits = 1;
@@ -445,7 +446,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
ia->ri_id->qp = NULL;
}
ib_free_cq(ep->rep_attr.recv_cq);
+ ep->rep_attr.recv_cq = NULL;
ib_free_cq(ep->rep_attr.send_cq);
+ ep->rep_attr.send_cq = NULL;
/* The ULP is responsible for ensuring all DMA
* mappings and MRs are gone.
@@ -458,6 +461,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
}
rpcrdma_mrs_destroy(buf);
+ ib_dealloc_pd(ia->ri_pd);
+ ia->ri_pd = NULL;
/* Allow waiters to continue */
complete(&ia->ri_remove_done);
@@ -589,11 +594,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
- ep->rep_remote_cma.responder_resources = 32;
- else
- ep->rep_remote_cma.responder_resources =
- ia->ri_device->attrs.max_qp_rd_atom;
+ ep->rep_remote_cma.responder_resources =
+ min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom);
/* Limit transport retries so client can detect server
* GID changes quickly. RPC layer handles re-establishing
@@ -628,14 +630,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
cancel_delayed_work_sync(&ep->rep_connect_worker);
- if (ia->ri_id->qp) {
+ if (ia->ri_id && ia->ri_id->qp) {
rpcrdma_ep_disconnect(ep, ia);
rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL;
}
- ib_free_cq(ep->rep_attr.recv_cq);
- ib_free_cq(ep->rep_attr.send_cq);
+ if (ep->rep_attr.recv_cq)
+ ib_free_cq(ep->rep_attr.recv_cq);
+ if (ep->rep_attr.send_cq)
+ ib_free_cq(ep->rep_attr.send_cq);
}
/* Re-establish a connection after a device removal event.
@@ -1024,7 +1028,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
LIST_HEAD(free);
LIST_HEAD(all);
- for (count = 0; count < 32; count++) {
+ for (count = 0; count < 3; count++) {
struct rpcrdma_mr *mr;
int rc;
@@ -1049,8 +1053,9 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
list_splice(&all, &buf->rb_all);
r_xprt->rx_stats.mrs_allocated += count;
spin_unlock(&buf->rb_mrlock);
-
trace_xprtrdma_createmrs(r_xprt, count);
+
+ xprt_write_space(&r_xprt->rx_xprt);
}
static void
@@ -1068,17 +1073,27 @@ struct rpcrdma_req *
rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
+ struct rpcrdma_regbuf *rb;
struct rpcrdma_req *req;
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (req == NULL)
return ERR_PTR(-ENOMEM);
+ rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
+ DMA_TO_DEVICE, GFP_KERNEL);
+ if (IS_ERR(rb)) {
+ kfree(req);
+ return ERR_PTR(-ENOMEM);
+ }
+ req->rl_rdmabuf = rb;
+ xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
+ req->rl_buffer = buffer;
+ INIT_LIST_HEAD(&req->rl_registered);
+
spin_lock(&buffer->rb_reqslock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_reqslock);
- req->rl_buffer = &r_xprt->rx_buf;
- INIT_LIST_HEAD(&req->rl_registered);
return req;
}
@@ -1535,7 +1550,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_req *req)
{
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
- struct ib_send_wr *send_wr_fail;
int rc;
if (req->rl_reply) {
@@ -1554,7 +1568,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
--ep->rep_send_count;
}
- rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
+ rc = ia->ri_ops->ro_send(ia, req);
trace_xprtrdma_post_send(req, rc);
if (rc)
return -ENOTCONN;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 69883a960a3f..3d3b423fa9c1 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -334,8 +334,6 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
- int rl_cpu;
- unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
@@ -474,6 +472,8 @@ struct rpcrdma_memreg_ops {
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mr **);
+ int (*ro_send)(struct rpcrdma_ia *ia,
+ struct rpcrdma_req *req);
void (*ro_reminv)(struct rpcrdma_rep *rep,
struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 956e29c1438d..c8902f11efdd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -527,6 +527,7 @@ static int xs_local_send_request(struct rpc_task *task)
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
+ req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
true, &sent);
dprintk("RPC: %s(%u) = %d\n",
@@ -589,6 +590,7 @@ static int xs_udp_send_request(struct rpc_task *task)
if (!xprt_bound(xprt))
return -ENOTCONN;
+ req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
xdr, req->rq_bytes_sent, true, &sent);
@@ -678,6 +680,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
+ req->rq_xtime = ktime_get();
while (1) {
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
@@ -1060,6 +1063,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
if (!rovr)
goto out_unlock;
xprt_pin_rqst(rovr);
+ xprt_update_rtt(rovr->rq_task);
spin_unlock(&xprt->recv_lock);
task = rovr->rq_task;
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 32dc33a94bc7..5453e564da82 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -777,7 +777,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg,
ret = tipc_bearer_get_name(net, bearer_name, bearer_id);
if (ret || !mon)
- return -EINVAL;
+ return 0;
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
NLM_F_MULTI, TIPC_NL_MON_GET);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index b1fe20972aa9..dd1c4fa2eb78 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -241,7 +241,8 @@ err:
static struct publication *tipc_service_remove_publ(struct net *net,
struct tipc_service *sc,
u32 lower, u32 upper,
- u32 node, u32 key)
+ u32 node, u32 key,
+ struct service_range **rng)
{
struct tipc_subscription *sub, *tmp;
struct service_range *sr;
@@ -275,19 +276,15 @@ static struct publication *tipc_service_remove_publ(struct net *net,
list_del(&p->all_publ);
list_del(&p->local_publ);
-
- /* Remove service range item if this was its last publication */
- if (list_empty(&sr->all_publ)) {
+ if (list_empty(&sr->all_publ))
last = true;
- rb_erase(&sr->tree_node, &sc->ranges);
- kfree(sr);
- }
/* Notify any waiting subscriptions */
list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_WITHDRAWN,
p->port, p->node, p->scope, last);
}
+ *rng = sr;
return p;
}
@@ -379,13 +376,20 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
u32 node, u32 key)
{
struct tipc_service *sc = tipc_service_find(net, type);
+ struct service_range *sr = NULL;
struct publication *p = NULL;
if (!sc)
return NULL;
spin_lock_bh(&sc->lock);
- p = tipc_service_remove_publ(net, sc, lower, upper, node, key);
+ p = tipc_service_remove_publ(net, sc, lower, upper, node, key, &sr);
+
+ /* Remove service range item if this was its last publication */
+ if (sr && list_empty(&sr->all_publ)) {
+ rb_erase(&sr->tree_node, &sc->ranges);
+ kfree(sr);
+ }
/* Delete service item if this no more publications and subscriptions */
if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
@@ -665,13 +669,14 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
/**
* tipc_nametbl_subscribe - add a subscription object to the name table
*/
-void tipc_nametbl_subscribe(struct tipc_subscription *sub)
+bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
{
struct name_table *nt = tipc_name_table(sub->net);
struct tipc_net *tn = tipc_net(sub->net);
struct tipc_subscr *s = &sub->evt.s;
u32 type = tipc_sub_read(s, seq.type);
struct tipc_service *sc;
+ bool res = true;
spin_lock_bh(&tn->nametbl_lock);
sc = tipc_service_find(sub->net, type);
@@ -685,8 +690,10 @@ void tipc_nametbl_subscribe(struct tipc_subscription *sub)
pr_warn("Failed to subscribe for {%u,%u,%u}\n", type,
tipc_sub_read(s, seq.lower),
tipc_sub_read(s, seq.upper));
+ res = false;
}
spin_unlock_bh(&tn->nametbl_lock);
+ return res;
}
/**
@@ -744,16 +751,17 @@ int tipc_nametbl_init(struct net *net)
static void tipc_service_delete(struct net *net, struct tipc_service *sc)
{
struct service_range *sr, *tmpr;
- struct publication *p, *tmpb;
+ struct publication *p, *tmp;
spin_lock_bh(&sc->lock);
rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
- list_for_each_entry_safe(p, tmpb,
- &sr->all_publ, all_publ) {
+ list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) {
tipc_service_remove_publ(net, sc, p->lower, p->upper,
- p->node, p->key);
+ p->node, p->key, &sr);
kfree_rcu(p, rcu);
}
+ rb_erase(&sr->tree_node, &sc->ranges);
+ kfree(sr);
}
hlist_del_init_rcu(&sc->service_list);
spin_unlock_bh(&sc->lock);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 4b14fc28d9e2..0febba41da86 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -126,7 +126,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
u32 lower, u32 upper,
u32 node, u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s);
+bool tipc_nametbl_subscribe(struct tipc_subscription *s);
void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 856f9e97ea29..4fbaa0464405 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -252,6 +252,8 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
u64 *w0 = (u64 *)&node_id[0];
u64 *w1 = (u64 *)&node_id[8];
+ if (!attrs[TIPC_NLA_NET_NODEID_W1])
+ return -EINVAL;
*w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
*w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
tipc_net_init(net, node_id, 0);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index b76f13f6fea1..6ff2254088f6 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -79,7 +79,10 @@ const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
[TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_NET_ID] = { .type = NLA_U32 }
+ [TIPC_NLA_NET_ID] = { .type = NLA_U32 },
+ [TIPC_NLA_NET_ADDR] = { .type = NLA_U32 },
+ [TIPC_NLA_NET_NODEID] = { .type = NLA_U64 },
+ [TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 },
};
const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c77dd2f3c589..6f98b56dd48e 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2232,8 +2232,8 @@ int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb)
struct net *net = sock_net(skb->sk);
u32 prev_bearer = cb->args[0];
struct tipc_nl_msg msg;
+ int bearer_id;
int err;
- int i;
if (prev_bearer == MAX_BEARERS)
return 0;
@@ -2243,16 +2243,13 @@ int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb)
msg.seq = cb->nlh->nlmsg_seq;
rtnl_lock();
- for (i = prev_bearer; i < MAX_BEARERS; i++) {
- prev_bearer = i;
+ for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) {
err = __tipc_nl_add_monitor(net, &msg, prev_bearer);
if (err)
- goto out;
+ break;
}
-
-out:
rtnl_unlock();
- cb->args[0] = prev_bearer;
+ cb->args[0] = bearer_id;
return skb->len;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1fd1c8b5ce03..252a52ae0893 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1278,7 +1278,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq *seq;
struct sk_buff_head pkts;
- u32 dnode, dport;
+ u32 dport, dnode = 0;
u32 type, inst;
int mtu, rc;
@@ -1348,6 +1348,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
msg_set_destnode(hdr, dnode);
msg_set_destport(hdr, dest->addr.id.ref);
msg_set_hdr_sz(hdr, BASIC_H_SIZE);
+ } else {
+ return -EINVAL;
}
/* Block or return if destination link is congested */
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index b7d80bc5f4ab..f340e53da625 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -153,7 +153,10 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net,
memcpy(&sub->evt.s, s, sizeof(*s));
spin_lock_init(&sub->lock);
kref_init(&sub->kref);
- tipc_nametbl_subscribe(sub);
+ if (!tipc_nametbl_subscribe(sub)) {
+ kfree(sub);
+ return NULL;
+ }
timer_setup(&sub->timer, tipc_sub_timeout, 0);
timeout = tipc_sub_read(&sub->evt.s, timeout);
if (timeout != TIPC_WAIT_FOREVER)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 4dc766b03f00..71e79597f940 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -41,6 +41,8 @@
#include <net/strparser.h>
#include <net/tls.h>
+#define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE
+
static int tls_do_decryption(struct sock *sk,
struct scatterlist *sgin,
struct scatterlist *sgout,
@@ -673,7 +675,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
- char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + tls_ctx->rx.iv_size];
+ char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + MAX_IV_SIZE];
struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2];
struct scatterlist *sgin = &sgin_arr[0];
struct strp_msg *rxm = strp_msg(skb);
@@ -1094,6 +1096,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
goto free_priv;
}
+ /* Sanity-check the IV size for stack allocations. */
+ if (iv_size > MAX_IV_SIZE) {
+ rc = -EINVAL;
+ goto free_priv;
+ }
+
cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
cctx->tag_size = tag_size;
cctx->overhead_size = cctx->prepend_size + cctx->tag_size;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index aac9b8f6552e..c1076c19b858 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -2018,7 +2018,13 @@ const struct vsock_transport *vsock_core_get_transport(void)
}
EXPORT_SYMBOL_GPL(vsock_core_get_transport);
+static void __exit vsock_exit(void)
+{
+ /* Do nothing. This function makes this module removable. */
+}
+
module_init(vsock_init_tables);
+module_exit(vsock_exit);
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
OpenPOWER on IntegriCloud