diff options
Diffstat (limited to 'include')
29 files changed, 279 insertions, 57 deletions
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 65a38c4a02a1..39e6f4c57580 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -211,6 +211,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_MON_STATEFUL_SUB | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \ + CEPH_FEATURE_MSG_ADDR2 | \ CEPH_FEATURE_CEPHX_V2) #define CEPH_FEATURES_REQUIRED_DEFAULT 0 diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 3ac0feaf2b5e..cb21c5cf12c3 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -682,7 +682,7 @@ extern const char *ceph_cap_op_name(int op); /* flags field in client cap messages (version >= 10) */ #define CEPH_CLIENT_CAPS_SYNC (1<<0) #define CEPH_CLIENT_CAPS_NO_CAPSNAP (1<<1) -#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP (1<<2); +#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP (1<<2) /* * caps message, used for capability callbacks, acks, requests, etc. diff --git a/include/linux/ceph/cls_lock_client.h b/include/linux/ceph/cls_lock_client.h index bea6c77d2093..17bc7584d1fe 100644 --- a/include/linux/ceph/cls_lock_client.h +++ b/include/linux/ceph/cls_lock_client.h @@ -52,4 +52,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc, char *lock_name, u8 *type, char **tag, struct ceph_locker **lockers, u32 *num_lockers); +int ceph_cls_assert_locked(struct ceph_osd_request *req, int which, + char *lock_name, u8 type, char *cookie, char *tag); + #endif diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index a6c2a48d42e0..450384fe487c 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -218,18 +218,27 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv, /* * sockaddr_storage <-> ceph_sockaddr */ -static inline void ceph_encode_addr(struct ceph_entity_addr *a) +#define CEPH_ENTITY_ADDR_TYPE_NONE 0 +#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1) + +static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a) { __be16 ss_family = htons(a->in_addr.ss_family); a->in_addr.ss_family = *(__u16 *)&ss_family; + + /* Banner addresses require TYPE_NONE */ + a->type = CEPH_ENTITY_ADDR_TYPE_NONE; } -static inline void ceph_decode_addr(struct ceph_entity_addr *a) +static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a) { __be16 ss_family = *(__be16 *)&a->in_addr.ss_family; a->in_addr.ss_family = ntohs(ss_family); WARN_ON(a->in_addr.ss_family == 512); + a->type = CEPH_ENTITY_ADDR_TYPE_LEGACY; } +extern int ceph_decode_entity_addr(void **p, void *end, + struct ceph_entity_addr *addr); /* * encoders */ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 337d5049ff93..82156da3c650 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -84,11 +84,13 @@ struct ceph_options { #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) /* - * Handle the largest possible rbd object in one message. + * The largest possible rbd data object is 32M. + * The largest possible rbd object map object is 64M. + * * There is no limit on the size of cephfs objects, but it has to obey * rsize and wsize mount options anyway. */ -#define CEPH_MSG_MAX_DATA_LEN (32*1024*1024) +#define CEPH_MSG_MAX_DATA_LEN (64*1024*1024) #define CEPH_AUTH_NAME_DEFAULT "guest" @@ -299,10 +301,6 @@ int ceph_wait_for_latest_osdmap(struct ceph_client *client, /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); - -extern struct page **ceph_get_direct_page_vector(const void __user *data, - int num_pages, - bool write_page); extern void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index 3a4688af7455..b4d134d3312a 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -104,7 +104,6 @@ struct ceph_mon_client { #endif }; -extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end); extern int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2294f963dab7..ad7fe5d10dcd 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -198,9 +198,9 @@ struct ceph_osd_request { bool r_mempool; struct completion r_completion; /* private to osd_client.c */ ceph_osdc_callback_t r_callback; - struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ + struct list_head r_private_item; /* ditto */ void *r_priv; /* ditto */ /* set by submitter */ @@ -389,6 +389,14 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err); +#define osd_req_op_data(oreq, whch, typ, fld) \ +({ \ + struct ceph_osd_request *__oreq = (oreq); \ + unsigned int __whch = (whch); \ + BUG_ON(__whch >= __oreq->r_num_ops); \ + &__oreq->r_ops[__whch].typ.fld; \ +}) + extern void osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags); @@ -497,7 +505,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc, const char *class, const char *method, unsigned int flags, struct page *req_page, size_t req_len, - struct page *resp_page, size_t *resp_len); + struct page **resp_pages, size_t *resp_len); extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, struct ceph_vino vino, diff --git a/include/linux/ceph/striper.h b/include/linux/ceph/striper.h index cbd0d24b7148..3486636c0e6e 100644 --- a/include/linux/ceph/striper.h +++ b/include/linux/ceph/striper.h @@ -66,4 +66,6 @@ int ceph_extent_to_file(struct ceph_file_layout *l, struct ceph_file_extent **file_extents, u32 *num_file_extents); +u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size); + #endif diff --git a/include/linux/dax.h b/include/linux/dax.h index becaea5f4488..9bd8528bd305 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -7,6 +7,9 @@ #include <linux/radix-tree.h> #include <asm/pgtable.h> +/* Flag for synchronous flush */ +#define DAXDEV_F_SYNC (1UL << 0) + typedef unsigned long dax_entry_t; struct iomap_ops; @@ -38,18 +41,40 @@ extern struct attribute_group dax_attribute_group; #if IS_ENABLED(CONFIG_DAX) struct dax_device *dax_get_by_host(const char *host); struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops); + const struct dax_operations *ops, unsigned long flags); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); +bool __dax_synchronous(struct dax_device *dax_dev); +static inline bool dax_synchronous(struct dax_device *dax_dev) +{ + return __dax_synchronous(dax_dev); +} +void __set_dax_synchronous(struct dax_device *dax_dev); +static inline void set_dax_synchronous(struct dax_device *dax_dev) +{ + __set_dax_synchronous(dax_dev); +} +/* + * Check if given mapping is supported by the file / underlying device. + */ +static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, + struct dax_device *dax_dev) +{ + if (!(vma->vm_flags & VM_SYNC)) + return true; + if (!IS_DAX(file_inode(vma->vm_file))) + return false; + return dax_synchronous(dax_dev); +} #else static inline struct dax_device *dax_get_by_host(const char *host) { return NULL; } static inline struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops) + const struct dax_operations *ops, unsigned long flags) { /* * Callers should check IS_ENABLED(CONFIG_DAX) to know if this @@ -70,6 +95,18 @@ static inline bool dax_write_cache_enabled(struct dax_device *dax_dev) { return false; } +static inline bool dax_synchronous(struct dax_device *dax_dev) +{ + return true; +} +static inline void set_dax_synchronous(struct dax_device *dax_dev) +{ +} +static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, + struct dax_device *dax_dev) +{ + return !(vma->vm_flags & VM_SYNC); +} #endif struct writeback_control; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 3b470cb03b66..399ad8632356 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -529,29 +529,20 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); *---------------------------------------------------------------*/ #define DM_NAME "device-mapper" -#define DM_RATELIMIT(pr_func, fmt, ...) \ -do { \ - static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, \ - DEFAULT_RATELIMIT_BURST); \ - \ - if (__ratelimit(&rs)) \ - pr_func(DM_FMT(fmt), ##__VA_ARGS__); \ -} while (0) - #define DM_FMT(fmt) DM_NAME ": " DM_MSG_PREFIX ": " fmt "\n" #define DMCRIT(fmt, ...) pr_crit(DM_FMT(fmt), ##__VA_ARGS__) #define DMERR(fmt, ...) pr_err(DM_FMT(fmt), ##__VA_ARGS__) -#define DMERR_LIMIT(fmt, ...) DM_RATELIMIT(pr_err, fmt, ##__VA_ARGS__) +#define DMERR_LIMIT(fmt, ...) pr_err_ratelimited(DM_FMT(fmt), ##__VA_ARGS__) #define DMWARN(fmt, ...) pr_warn(DM_FMT(fmt), ##__VA_ARGS__) -#define DMWARN_LIMIT(fmt, ...) DM_RATELIMIT(pr_warn, fmt, ##__VA_ARGS__) +#define DMWARN_LIMIT(fmt, ...) pr_warn_ratelimited(DM_FMT(fmt), ##__VA_ARGS__) #define DMINFO(fmt, ...) pr_info(DM_FMT(fmt), ##__VA_ARGS__) -#define DMINFO_LIMIT(fmt, ...) DM_RATELIMIT(pr_info, fmt, ##__VA_ARGS__) +#define DMINFO_LIMIT(fmt, ...) pr_info_ratelimited(DM_FMT(fmt), ##__VA_ARGS__) #ifdef CONFIG_DM_DEBUG #define DMDEBUG(fmt, ...) printk(KERN_DEBUG DM_FMT(fmt), ##__VA_ARGS__) -#define DMDEBUG_LIMIT(fmt, ...) DM_RATELIMIT(pr_debug, fmt, ##__VA_ARGS__) +#define DMDEBUG_LIMIT(fmt, ...) pr_debug_ratelimited(DM_FMT(fmt), ##__VA_ARGS__) #else #define DMDEBUG(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #define DMDEBUG_LIMIT(fmt, ...) no_printk(fmt, ##__VA_ARGS__) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 25e2995d4a4c..8a8cb3c401b2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -427,8 +427,8 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter); iter = ftrace_rec_iter_next(iter)) -int ftrace_update_record(struct dyn_ftrace *rec, int enable); -int ftrace_test_record(struct dyn_ftrace *rec, int enable); +int ftrace_update_record(struct dyn_ftrace *rec, bool enable); +int ftrace_test_record(struct dyn_ftrace *rec, bool enable); void ftrace_run_stop_machine(int command); unsigned long ftrace_location(unsigned long ip); unsigned long ftrace_location_range(unsigned long start, unsigned long end); diff --git a/include/linux/iversion.h b/include/linux/iversion.h index be50ef7cedab..2917ef990d43 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -113,6 +113,30 @@ inode_peek_iversion_raw(const struct inode *inode) } /** + * inode_set_max_iversion_raw - update i_version new value is larger + * @inode: inode to set + * @val: new i_version to set + * + * Some self-managed filesystems (e.g Ceph) will only update the i_version + * value if the new value is larger than the one we already have. + */ +static inline void +inode_set_max_iversion_raw(struct inode *inode, u64 val) +{ + u64 cur, old; + + cur = inode_peek_iversion_raw(inode); + for (;;) { + if (cur > val) + break; + old = atomic64_cmpxchg(&inode->i_version, cur, val); + if (likely(old == cur)) + break; + cur = old; + } +} + +/** * inode_set_iversion - set i_version to a particular value * @inode: inode to set * @val: new i_version value to set diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 03d5c3aece9d..7a64b3ddb408 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/uuid.h> #include <linux/spinlock.h> +#include <linux/bio.h> struct badrange_entry { u64 start; @@ -57,6 +58,9 @@ enum { */ ND_REGION_PERSIST_MEMCTRL = 2, + /* Platform provides asynchronous flush mechanism */ + ND_REGION_ASYNC = 3, + /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, }; @@ -113,6 +117,7 @@ struct nd_mapping_desc { int position; }; +struct nd_region; struct nd_region_desc { struct resource *res; struct nd_mapping_desc *mapping; @@ -125,6 +130,7 @@ struct nd_region_desc { int target_node; unsigned long flags; struct device_node *of_node; + int (*flush)(struct nd_region *nd_region, struct bio *bio); }; struct device; @@ -252,10 +258,12 @@ unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr); unsigned int nd_region_acquire_lane(struct nd_region *nd_region); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); -void nvdimm_flush(struct nd_region *nd_region); +int nvdimm_flush(struct nd_region *nd_region, struct bio *bio); +int generic_nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); int nvdimm_has_cache(struct nd_region *nd_region); int nvdimm_in_overwrite(struct nvdimm *nvdimm); +bool is_nvdimm_sync(struct nd_region *nd_region); static inline int nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 31013c2effd3..5229c18025e9 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -29,6 +29,11 @@ void *module_alloc(unsigned long size); /* Free memory returned from module_alloc. */ void module_memfree(void *module_region); +/* Determines if the section name is an exit section (that is only used during + * module unloading) + */ +bool module_exit_section(const char *name); + /* * Apply the given relocation to the (simplified) ELF. Return -error * or 0. diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 22494d170619..fd59904a282c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -660,6 +660,7 @@ enum pnfs_update_layout_reason { PNFS_UPDATE_LAYOUT_BLOCKED, PNFS_UPDATE_LAYOUT_INVALID_OPEN, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, + PNFS_UPDATE_LAYOUT_EXIT, }; #define NFS4_OP_MAP_NUM_LONGS \ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d363d5765cdf..0a11712a80e3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -223,6 +223,8 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_MTIME BIT(10) /* cached mtime is invalid */ #define NFS_INO_INVALID_SIZE BIT(11) /* cached size is invalid */ #define NFS_INO_INVALID_OTHER BIT(12) /* other attrs are invalid */ +#define NFS_INO_DATA_INVAL_DEFER \ + BIT(13) /* Deferred cache invalidation */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 1e78032a174b..a87fe854f008 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -58,6 +58,7 @@ struct nfs_client { struct nfs_subversion * cl_nfs_mod; /* pointer to nfs version module */ u32 cl_minorversion;/* NFSv4 minorversion */ + unsigned int cl_nconnect; /* Number of connections */ const char * cl_principal; /* used for machine cred */ #if IS_ENABLED(CONFIG_NFS_V4) diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index d4229a78524a..87d27e13d885 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -43,6 +43,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); void xprt_free_bc_rqst(struct rpc_rqst *req); +unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt); /* * Determine if a shared backchannel is in use diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6e8073140a5d..abc63bd1be2b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -124,6 +124,7 @@ struct rpc_create_args { u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; + u32 nconnect; unsigned long flags; char *client_name; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ @@ -163,6 +164,8 @@ void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); void rpc_task_release_transport(struct rpc_task *); void rpc_task_release_client(struct rpc_task *); +struct rpc_xprt *rpc_task_get_xprt(struct rpc_clnt *clnt, + struct rpc_xprt *xprt); int rpcb_create_local(struct net *); void rpcb_put_local(struct net *); @@ -191,6 +194,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); size_t rpc_max_bc_payload(struct rpc_clnt *); +unsigned int rpc_num_bc_slots(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index 1b3751327575..0ee3f7052846 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -30,7 +30,7 @@ #include <linux/ktime.h> #include <linux/spinlock.h> -#define RPC_IOSTATS_VERS "1.0" +#define RPC_IOSTATS_VERS "1.1" struct rpc_iostats { spinlock_t om_lock; @@ -66,6 +66,11 @@ struct rpc_iostats { ktime_t om_queue, /* queued for xmit */ om_rtt, /* RPC RTT */ om_execute; /* RPC execution */ + /* + * The count of operations that complete with tk_status < 0. + * These statuses usually indicate error conditions. + */ + unsigned long om_error_status; } ____cacheline_aligned; struct rpc_task; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d0e451868f02..baa3ecdb882f 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -126,6 +126,7 @@ struct rpc_task_setup { #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ +#define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ @@ -183,8 +184,9 @@ struct rpc_task_setup { #define RPC_NR_PRIORITY (1 + RPC_PRIORITY_PRIVILEGED - RPC_PRIORITY_LOW) struct rpc_timer { - struct timer_list timer; struct list_head list; + unsigned long expires; + struct delayed_work dwork; }; /* diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a6d9fce7f20e..13e108bcc9eb 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -158,6 +158,7 @@ struct rpc_xprt_ops { int (*bc_setup)(struct rpc_xprt *xprt, unsigned int min_reqs); size_t (*bc_maxpayload)(struct rpc_xprt *xprt); + unsigned int (*bc_num_slots)(struct rpc_xprt *xprt); void (*bc_free_rqst)(struct rpc_rqst *rqst); void (*bc_destroy)(struct rpc_xprt *xprt, unsigned int max_reqs); @@ -238,6 +239,7 @@ struct rpc_xprt { /* * Send stuff */ + atomic_long_t queuelen; spinlock_t transport_lock; /* lock transport info */ spinlock_t reserve_lock; /* lock slot table */ spinlock_t queue_lock; /* send/receive queue lock */ @@ -250,8 +252,9 @@ struct rpc_xprt { #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ - int bc_alloc_count; /* Total number of preallocs */ - atomic_t bc_free_slots; + unsigned int bc_alloc_max; + unsigned int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_slot_count; /* Number of allocated slots */ spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated @@ -334,6 +337,9 @@ struct xprt_class { */ struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); +unsigned long xprt_reconnect_delay(const struct rpc_xprt *xprt); +void xprt_reconnect_backoff(struct rpc_xprt *xprt, + unsigned long init_to); void xprt_reserve(struct rpc_task *task); void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index af1257c030d2..c6cce3fbf29d 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -15,6 +15,8 @@ struct rpc_xprt_switch { struct kref xps_kref; unsigned int xps_nxprts; + unsigned int xps_nactive; + atomic_long_t xps_queuelen; struct list_head xps_xprt_list; struct net * xps_net; diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index b81d0b3e0799..7638dbe7bc50 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -56,6 +56,7 @@ struct sock_xprt { */ unsigned long sock_state; struct delayed_work connect_worker; + struct work_struct error_worker; struct work_struct recv_worker; struct mutex recv_mutex; struct sockaddr_storage srcaddr; @@ -84,6 +85,10 @@ struct sock_xprt { #define XPRT_SOCK_CONNECTING 1U #define XPRT_SOCK_DATA_READY (2) #define XPRT_SOCK_UPD_TIMEOUT (3) +#define XPRT_SOCK_WAKE_ERROR (4) +#define XPRT_SOCK_WAKE_WRITE (5) +#define XPRT_SOCK_WAKE_PENDING (6) +#define XPRT_SOCK_WAKE_DISCONNECT (7) #endif /* __KERNEL__ */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8a62731673f7..5150436783e8 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -142,6 +142,7 @@ enum print_line_t { enum print_line_t trace_handle_return(struct trace_seq *s); void tracing_generic_entry_update(struct trace_entry *entry, + unsigned short type, unsigned long flags, int pc); struct trace_event_file; @@ -317,6 +318,14 @@ trace_event_name(struct trace_event_call *call) return call->name; } +static inline struct list_head * +trace_get_fields(struct trace_event_call *event_call) +{ + if (!event_call->class->get_fields) + return &event_call->class->fields; + return event_call->class->get_fields(event_call); +} + struct trace_array; struct trace_subsystem_dir; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 2b70130af585..34a038563d97 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -203,7 +203,10 @@ static inline void pagefault_enable(void) /* * Is the pagefault handler disabled? If so, user access methods will not sleep. */ -#define pagefault_disabled() (current->pagefault_disabled != 0) +static inline bool pagefault_disabled(void) +{ + return current->pagefault_disabled != 0; +} /* * The pagefault handler is in general disabled by pagefault_disable() or @@ -240,6 +243,18 @@ extern long probe_kernel_read(void *dst, const void *src, size_t size); extern long __probe_kernel_read(void *dst, const void *src, size_t size); /* + * probe_user_read(): safely attempt to read from a location in user space + * @dst: pointer to the buffer that shall take the data + * @src: address to read from + * @size: size of the data chunk + * + * Safely read from address @src to the buffer at @dst. If a kernel fault + * happens, handle that and return -EFAULT. + */ +extern long probe_user_read(void *dst, const void __user *src, size_t size); +extern long __probe_user_read(void *dst, const void __user *src, size_t size); + +/* * probe_kernel_write(): safely attempt to write to a location * @dst: address to write to * @src: pointer to the data that shall be written @@ -252,6 +267,9 @@ extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); +extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, + long count); +extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); /** * probe_kernel_address(): safely attempt to read from a location diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index df9851cb82b2..f6a4eaa85a3e 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -181,18 +181,6 @@ DECLARE_EVENT_CLASS(xprtrdma_wrch_event, ), \ TP_ARGS(task, mr, nsegs)) -TRACE_DEFINE_ENUM(FRWR_IS_INVALID); -TRACE_DEFINE_ENUM(FRWR_IS_VALID); -TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR); -TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI); - -#define xprtrdma_show_frwr_state(x) \ - __print_symbolic(x, \ - { FRWR_IS_INVALID, "INVALID" }, \ - { FRWR_IS_VALID, "VALID" }, \ - { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \ - { FRWR_FLUSHED_LI, "FLUSHED_LI" }) - DECLARE_EVENT_CLASS(xprtrdma_frwr_done, TP_PROTO( const struct ib_wc *wc, @@ -203,22 +191,19 @@ DECLARE_EVENT_CLASS(xprtrdma_frwr_done, TP_STRUCT__entry( __field(const void *, mr) - __field(unsigned int, state) __field(unsigned int, status) __field(unsigned int, vendor_err) ), TP_fast_assign( __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr); - __entry->state = frwr->fr_state; __entry->status = wc->status; __entry->vendor_err = __entry->status ? wc->vendor_err : 0; ), TP_printk( - "mr=%p state=%s: %s (%u/0x%x)", - __entry->mr, xprtrdma_show_frwr_state(__entry->state), - rdma_show_wc_status(__entry->status), + "mr=%p: %s (%u/0x%x)", + __entry->mr, rdma_show_wc_status(__entry->status), __entry->status, __entry->vendor_err ) ); @@ -390,6 +375,37 @@ DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); DEFINE_RXPRT_EVENT(xprtrdma_op_close); DEFINE_RXPRT_EVENT(xprtrdma_op_connect); +TRACE_EVENT(xprtrdma_op_set_cto, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + unsigned long connect, + unsigned long reconnect + ), + + TP_ARGS(r_xprt, connect, reconnect), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned long, connect) + __field(unsigned long, reconnect) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->connect = connect; + __entry->reconnect = reconnect; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->connect / HZ, __entry->reconnect / HZ + ) +); + TRACE_EVENT(xprtrdma_qp_event, TP_PROTO( const struct rpcrdma_xprt *r_xprt, @@ -470,13 +486,12 @@ TRACE_DEFINE_ENUM(rpcrdma_replych); TRACE_EVENT(xprtrdma_marshal, TP_PROTO( - const struct rpc_rqst *rqst, - unsigned int hdrlen, + const struct rpcrdma_req *req, unsigned int rtype, unsigned int wtype ), - TP_ARGS(rqst, hdrlen, rtype, wtype), + TP_ARGS(req, rtype, wtype), TP_STRUCT__entry( __field(unsigned int, task_id) @@ -491,10 +506,12 @@ TRACE_EVENT(xprtrdma_marshal, ), TP_fast_assign( + const struct rpc_rqst *rqst = &req->rl_slot; + __entry->task_id = rqst->rq_task->tk_pid; __entry->client_id = rqst->rq_task->tk_client->cl_clid; __entry->xid = be32_to_cpu(rqst->rq_xid); - __entry->hdrlen = hdrlen; + __entry->hdrlen = req->rl_hdrbuf.len; __entry->headlen = rqst->rq_snd_buf.head[0].iov_len; __entry->pagelen = rqst->rq_snd_buf.page_len; __entry->taillen = rqst->rq_snd_buf.tail[0].iov_len; @@ -538,6 +555,33 @@ TRACE_EVENT(xprtrdma_marshal_failed, ) ); +TRACE_EVENT(xprtrdma_prepsend_failed, + TP_PROTO(const struct rpc_rqst *rqst, + int ret + ), + + TP_ARGS(rqst, ret), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(int, ret) + ), + + TP_fast_assign( + __entry->task_id = rqst->rq_task->tk_pid; + __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->ret = ret; + ), + + TP_printk("task:%u@%u xid=0x%08x: ret=%d", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->ret + ) +); + TRACE_EVENT(xprtrdma_post_send, TP_PROTO( const struct rpcrdma_req *req, @@ -559,7 +603,8 @@ TRACE_EVENT(xprtrdma_post_send, const struct rpc_rqst *rqst = &req->rl_slot; __entry->task_id = rqst->rq_task->tk_pid; - __entry->client_id = rqst->rq_task->tk_client->cl_clid; + __entry->client_id = rqst->rq_task->tk_client ? + rqst->rq_task->tk_client->cl_clid : -1; __entry->req = req; __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; __entry->signaled = req->rl_sendctx->sc_wr.send_flags & @@ -698,6 +743,7 @@ TRACE_EVENT(xprtrdma_wc_receive, DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg); DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li); DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake); +DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_done); TRACE_EVENT(xprtrdma_frwr_alloc, TP_PROTO( diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index cfe47c5d9a56..348fd0176f75 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -44,5 +44,6 @@ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h new file mode 100644 index 000000000000..9a63ed6d062f --- /dev/null +++ b/include/uapi/linux/virtio_pmem.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* + * Definitions for virtio-pmem devices. + * + * Copyright (C) 2019 Red Hat, Inc. + * + * Author(s): Pankaj Gupta <pagupta@redhat.com> + */ + +#ifndef _UAPI_LINUX_VIRTIO_PMEM_H +#define _UAPI_LINUX_VIRTIO_PMEM_H + +#include <linux/types.h> +#include <linux/virtio_ids.h> +#include <linux/virtio_config.h> + +struct virtio_pmem_config { + __u64 start; + __u64 size; +}; + +#define VIRTIO_PMEM_REQ_TYPE_FLUSH 0 + +struct virtio_pmem_resp { + /* Host return status corresponding to flush request */ + __le32 ret; +}; + +struct virtio_pmem_req { + /* command type */ + __le32 type; +}; + +#endif |