From e1603b6effe177210701d3d7132d1b68e7bd2c93 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 9 Feb 2018 18:04:54 +0300 Subject: inotify: Extend ioctl to allow to request id of new watch descriptor Watch descriptor is id of the watch created by inotify_add_watch(). It is allocated in inotify_add_to_idr(), and takes the numbers starting from 1. Every new inotify watch obtains next available number (usually, old + 1), as served by idr_alloc_cyclic(). CRIU (Checkpoint/Restore In Userspace) project supports inotify files, and restores watched descriptors with the same numbers, they had before dump. Since there was no kernel support, we had to use cycle to add a watch with specific descriptor id: while (1) { int wd; wd = inotify_add_watch(inotify_fd, path, mask); if (wd < 0) { break; } else if (wd == desired_wd_id) { ret = 0; break; } inotify_rm_watch(inotify_fd, wd); } (You may find the actual code at the below link: https://github.com/checkpoint-restore/criu/blob/v3.7/criu/fsnotify.c#L577) The cycle is suboptiomal and very expensive, but since there is no better kernel support, it was the only way to restore that. Happily, we had met mostly descriptors with small id, and this approach had worked somehow. But recent time containers with inotify with big watch descriptors begun to come, and this way stopped to work at all. When descriptor id is something about 0x34d71d6, the restoring process spins in busy loop for a long time, and the restore hungs and delay of migration from node to node could easily be watched. This patch aims to solve this problem. It introduces new ioctl INOTIFY_IOC_SETNEXTWD, which allows to request the number of next created watch descriptor from userspace. It simply calls idr_set_cursor() primitive to populate idr::idr_next, so that next idr_alloc_cyclic() allocation will return this id, if it is not occupied. This is the way which is used to restore some other resources from userspace. For example, /proc/sys/kernel/ns_last_pid works the same for task pids. The new code is under CONFIG_CHECKPOINT_RESTORE #define, so small system may exclude it. v2: Use INT_MAX instead of custom definition of max id, as IDR subsystem guarantees id is between 0 and INT_MAX. CC: Jan Kara CC: Matthew Wilcox CC: Andrew Morton CC: Amir Goldstein Signed-off-by: Kirill Tkhai Reviewed-by: Cyrill Gorcunov Reviewed-by: Matthew Wilcox Reviewed-by: Andrew Morton Signed-off-by: Jan Kara --- fs/notify/inotify/inotify_user.c | 14 ++++++++++++++ include/uapi/linux/inotify.h | 8 ++++++++ 2 files changed, 22 insertions(+) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 2c908b31d6c9..8f17719842ec 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -307,6 +307,20 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, spin_unlock(&group->notification_lock); ret = put_user(send_len, (int __user *) p); break; +#ifdef CONFIG_CHECKPOINT_RESTORE + case INOTIFY_IOC_SETNEXTWD: + ret = -EINVAL; + if (arg >= 1 && arg <= INT_MAX) { + struct inotify_group_private_data *data; + + data = &group->inotify_data; + spin_lock(&data->idr_lock); + idr_set_cursor(&data->idr, (unsigned int)arg); + spin_unlock(&data->idr_lock); + ret = 0; + } + break; +#endif /* CONFIG_CHECKPOINT_RESTORE */ } return ret; diff --git a/include/uapi/linux/inotify.h b/include/uapi/linux/inotify.h index 5474461683db..4800bf2a531d 100644 --- a/include/uapi/linux/inotify.h +++ b/include/uapi/linux/inotify.h @@ -71,5 +71,13 @@ struct inotify_event { #define IN_CLOEXEC O_CLOEXEC #define IN_NONBLOCK O_NONBLOCK +/* + * ioctl numbers: inotify uses 'I' prefix for all ioctls, + * except historical FIONREAD, which is based on 'T'. + * + * INOTIFY_IOC_SETNEXTWD: set desired number of next created + * watch descriptor. + */ +#define INOTIFY_IOC_SETNEXTWD _IOW('I', 0, __s32) #endif /* _UAPI_LINUX_INOTIFY_H */ -- cgit v1.2.1 From 91c9c9ec54c1e88d9ef59a7b12cf31d6ef5d1e58 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 8 Feb 2018 17:39:01 +0100 Subject: udf: Fix off-by-one in volume descriptor sequence length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We pass one block beyond end of volume descriptor sequence into process_sequence() as 'lastblock' instead of the last block of the sequence. When the sequence is not terminated with TD descriptor, this could lead to false errors due to invalid blocks in volume descriptor sequence and thus unmountable volumes. Acked-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index f73239a9a97d..5c5d5fd513cc 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1658,7 +1658,7 @@ static noinline int udf_process_sequence( next_e = le32_to_cpu( vdp->nextVolDescSeqExt.extLength); next_e = next_e >> sb->s_blocksize_bits; - next_e += next_s; + next_e += next_s - 1; } break; case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ @@ -1760,13 +1760,13 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh, main_s = le32_to_cpu(anchor->mainVolDescSeqExt.extLocation); main_e = le32_to_cpu(anchor->mainVolDescSeqExt.extLength); main_e = main_e >> sb->s_blocksize_bits; - main_e += main_s; + main_e += main_s - 1; /* Locate the reserve sequence */ reserve_s = le32_to_cpu(anchor->reserveVolDescSeqExt.extLocation); reserve_e = le32_to_cpu(anchor->reserveVolDescSeqExt.extLength); reserve_e = reserve_e >> sb->s_blocksize_bits; - reserve_e += reserve_s; + reserve_e += reserve_s - 1; /* Process the main & reserve sequences */ /* responsible for finding the PartitionDesc(s) */ -- cgit v1.2.1 From 7b568cba4f0af32522f84c1378e71a47a9c61bb0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 9 Feb 2018 13:28:41 +0100 Subject: udf: Simplify handling of Volume Descriptor Pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to ECMA-167 3/8.4.2 Volume Descriptor Pointer is terminating current extent of Volume Descriptor Sequence. Also according to ECMA-167 3/8.4.3 Volume Descriptor Sequence Number is not significant for Volume Descriptor Pointers. Simplify the handling of Volume Descriptor Pointers to take this into account. Acked-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/super.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 5c5d5fd513cc..f80b97173acd 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1615,7 +1615,6 @@ static noinline int udf_process_sequence( bool done = false; uint32_t vdsn; uint16_t ident; - long next_s = 0, next_e = 0; int ret; unsigned int indirections = 0; @@ -1647,19 +1646,22 @@ static noinline int udf_process_sequence( } break; case TAG_IDENT_VDP: /* ISO 13346 3/10.3 */ - curr = &vds[VDS_POS_VOL_DESC_PTR]; - if (vdsn >= curr->volDescSeqNum) { - curr->volDescSeqNum = vdsn; - curr->block = block; - - vdp = (struct volDescPtr *)bh->b_data; - next_s = le32_to_cpu( - vdp->nextVolDescSeqExt.extLocation); - next_e = le32_to_cpu( - vdp->nextVolDescSeqExt.extLength); - next_e = next_e >> sb->s_blocksize_bits; - next_e += next_s - 1; + if (++indirections > UDF_MAX_TD_NESTING) { + udf_err(sb, "too many Volume Descriptor " + "Pointers (max %u supported)\n", + UDF_MAX_TD_NESTING); + brelse(bh); + return -EIO; } + + vdp = (struct volDescPtr *)bh->b_data; + block = le32_to_cpu(vdp->nextVolDescSeqExt.extLocation); + lastblock = le32_to_cpu( + vdp->nextVolDescSeqExt.extLength) >> + sb->s_blocksize_bits; + lastblock += block - 1; + /* For loop is going to increment 'block' again */ + block--; break; case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ curr = &vds[VDS_POS_IMP_USE_VOL_DESC]; @@ -1688,19 +1690,8 @@ static noinline int udf_process_sequence( } break; case TAG_IDENT_TD: /* ISO 13346 3/10.9 */ - if (++indirections > UDF_MAX_TD_NESTING) { - udf_err(sb, "too many TDs (max %u supported)\n", UDF_MAX_TD_NESTING); - brelse(bh); - return -EIO; - } - vds[VDS_POS_TERMINATING_DESC].block = block; - if (next_e) { - block = next_s; - lastblock = next_e; - next_s = next_e = 0; - } else - done = true; + done = true; break; } brelse(bh); -- cgit v1.2.1 From 67621675e9636ed272a4f0235215accdfbc99768 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 8 Feb 2018 18:10:54 +0100 Subject: udf: Allow volume descriptor sequence to be terminated by unrecorded block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to ECMA-167 3/8.4.2 a volume descriptor sequence can be terminated also by an unrecorded block within the extent of volume descriptor sequence. Currently we errored out in such case making such volumes unmountable. Handle that case by treating any invalid block as a block terminating the sequence. Reported-by: Pali Rohár Acked-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/super.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index f80b97173acd..456d737fc7ca 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1627,12 +1627,8 @@ static noinline int udf_process_sequence( for (; (!done && block <= lastblock); block++) { bh = udf_read_tagged(sb, block, block, &ident); - if (!bh) { - udf_err(sb, - "Block %llu of volume descriptor sequence is corrupted or we could not read it\n", - (unsigned long long)block); - return -EAGAIN; - } + if (!bh) + break; /* Process each descriptor (ISO 13346 3/8.3-8.4) */ gd = (struct generic_desc *)bh->b_data; -- cgit v1.2.1 From 4b8d425215f9a5f04d6ca51769a1d26cbc084aec Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 9 Feb 2018 12:52:28 +0100 Subject: udf: Convert descriptor index definitions to enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert index definitions from defines to enum. It is a shorter description and easier to modify. Also remove VDS_POS_VOL_DESC_PTR since it is unused. Acked-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/super.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 456d737fc7ca..7712fa4b5a3d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -64,14 +64,15 @@ #include #include -#define VDS_POS_PRIMARY_VOL_DESC 0 -#define VDS_POS_UNALLOC_SPACE_DESC 1 -#define VDS_POS_LOGICAL_VOL_DESC 2 -#define VDS_POS_PARTITION_DESC 3 -#define VDS_POS_IMP_USE_VOL_DESC 4 -#define VDS_POS_VOL_DESC_PTR 5 -#define VDS_POS_TERMINATING_DESC 6 -#define VDS_POS_LENGTH 7 +enum { + VDS_POS_PRIMARY_VOL_DESC, + VDS_POS_UNALLOC_SPACE_DESC, + VDS_POS_LOGICAL_VOL_DESC, + VDS_POS_PARTITION_DESC, + VDS_POS_IMP_USE_VOL_DESC, + VDS_POS_TERMINATING_DESC, + VDS_POS_LENGTH +}; #define VSD_FIRST_SECTOR_OFFSET 32768 #define VSD_MAX_SECTOR_OFFSET 0x800000 -- cgit v1.2.1 From 18cf4781c9e5dc1067dd50a8f8f321c70142461f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 9 Feb 2018 15:15:39 +0100 Subject: udf: Unify common handling of descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When scanning Volume Descriptor Sequence, several descriptors have exactly the same handling. Unify it. Acked-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/super.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 7712fa4b5a3d..3e9925acd235 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1593,6 +1593,21 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ sbi->s_lvid_bh = NULL; } +static struct udf_vds_record *get_volume_descriptor_record( + struct udf_vds_record *vds, uint16_t ident) +{ + switch (ident) { + case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ + return &vds[VDS_POS_PRIMARY_VOL_DESC]; + case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ + return &vds[VDS_POS_IMP_USE_VOL_DESC]; + case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ + return &vds[VDS_POS_LOGICAL_VOL_DESC]; + case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ + return &vds[VDS_POS_UNALLOC_SPACE_DESC]; + } + return NULL; +} /* * Process a main/reserve volume descriptor sequence. @@ -1635,13 +1650,6 @@ static noinline int udf_process_sequence( gd = (struct generic_desc *)bh->b_data; vdsn = le32_to_cpu(gd->volDescSeqNum); switch (ident) { - case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ - curr = &vds[VDS_POS_PRIMARY_VOL_DESC]; - if (vdsn >= curr->volDescSeqNum) { - curr->volDescSeqNum = vdsn; - curr->block = block; - } - break; case TAG_IDENT_VDP: /* ISO 13346 3/10.3 */ if (++indirections > UDF_MAX_TD_NESTING) { udf_err(sb, "too many Volume Descriptor " @@ -1660,8 +1668,11 @@ static noinline int udf_process_sequence( /* For loop is going to increment 'block' again */ block--; break; + case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ - curr = &vds[VDS_POS_IMP_USE_VOL_DESC]; + case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ + case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ + curr = get_volume_descriptor_record(vds, ident); if (vdsn >= curr->volDescSeqNum) { curr->volDescSeqNum = vdsn; curr->block = block; @@ -1672,20 +1683,6 @@ static noinline int udf_process_sequence( if (!curr->block) curr->block = block; break; - case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ - curr = &vds[VDS_POS_LOGICAL_VOL_DESC]; - if (vdsn >= curr->volDescSeqNum) { - curr->volDescSeqNum = vdsn; - curr->block = block; - } - break; - case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ - curr = &vds[VDS_POS_UNALLOC_SPACE_DESC]; - if (vdsn >= curr->volDescSeqNum) { - curr->volDescSeqNum = vdsn; - curr->block = block; - } - break; case TAG_IDENT_TD: /* ISO 13346 3/10.9 */ vds[VDS_POS_TERMINATING_DESC].block = block; done = true; -- cgit v1.2.1 From 7b78fd02fb19530fd101ae137a1f46aa466d9bb6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 9 Feb 2018 15:56:34 +0100 Subject: udf: Fix handling of Partition Descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current handling of Partition Descriptors in Volume Descriptor Sequence is buggy in several ways. Firstly, it does not take descriptor sequence numbers into account at all, thus any volume making serious use of them would be unmountable. Secondly, it does not handle Volume Descriptor Pointers or Volume Descriptor Sequence without Terminating Descriptor. Fix these problems by properly remembering all Partition Descriptors in the Volume Descriptor Sequence and their sequence numbers. This is made more complicated by the fact that we don't know number of partitions in advance and sequence numbers have to be tracked on per-partition basis. Reported-by: Pali Rohár Acked-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/super.c | 105 +++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 32 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 3e9925acd235..28ee2ac2503d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -68,9 +68,7 @@ enum { VDS_POS_PRIMARY_VOL_DESC, VDS_POS_UNALLOC_SPACE_DESC, VDS_POS_LOGICAL_VOL_DESC, - VDS_POS_PARTITION_DESC, VDS_POS_IMP_USE_VOL_DESC, - VDS_POS_TERMINATING_DESC, VDS_POS_LENGTH }; @@ -1593,18 +1591,57 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ sbi->s_lvid_bh = NULL; } -static struct udf_vds_record *get_volume_descriptor_record( - struct udf_vds_record *vds, uint16_t ident) +/* + * Step for reallocation of table of partition descriptor sequence numbers. + * Must be power of 2. + */ +#define PART_DESC_ALLOC_STEP 32 + +struct desc_seq_scan_data { + struct udf_vds_record vds[VDS_POS_LENGTH]; + unsigned int size_part_descs; + struct udf_vds_record *part_descs_loc; +}; + +static struct udf_vds_record *handle_partition_descriptor( + struct buffer_head *bh, + struct desc_seq_scan_data *data) +{ + struct partitionDesc *desc = (struct partitionDesc *)bh->b_data; + int partnum; + + partnum = le16_to_cpu(desc->partitionNumber); + if (partnum >= data->size_part_descs) { + struct udf_vds_record *new_loc; + unsigned int new_size = ALIGN(partnum, PART_DESC_ALLOC_STEP); + + new_loc = kzalloc(sizeof(*new_loc) * new_size, GFP_KERNEL); + if (!new_loc) + return ERR_PTR(-ENOMEM); + memcpy(new_loc, data->part_descs_loc, + data->size_part_descs * sizeof(*new_loc)); + kfree(data->part_descs_loc); + data->part_descs_loc = new_loc; + data->size_part_descs = new_size; + } + return &(data->part_descs_loc[partnum]); +} + + +static struct udf_vds_record *get_volume_descriptor_record(uint16_t ident, + struct buffer_head *bh, struct desc_seq_scan_data *data) { switch (ident) { case TAG_IDENT_PVD: /* ISO 13346 3/10.1 */ - return &vds[VDS_POS_PRIMARY_VOL_DESC]; + return &(data->vds[VDS_POS_PRIMARY_VOL_DESC]); case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ - return &vds[VDS_POS_IMP_USE_VOL_DESC]; + return &(data->vds[VDS_POS_IMP_USE_VOL_DESC]); case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ - return &vds[VDS_POS_LOGICAL_VOL_DESC]; + return &(data->vds[VDS_POS_LOGICAL_VOL_DESC]); case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ - return &vds[VDS_POS_UNALLOC_SPACE_DESC]; + return &(data->vds[VDS_POS_UNALLOC_SPACE_DESC]); + case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ + return handle_partition_descriptor(bh, data); } return NULL; } @@ -1624,7 +1661,6 @@ static noinline int udf_process_sequence( struct kernel_lb_addr *fileset) { struct buffer_head *bh = NULL; - struct udf_vds_record vds[VDS_POS_LENGTH]; struct udf_vds_record *curr; struct generic_desc *gd; struct volDescPtr *vdp; @@ -1633,8 +1669,15 @@ static noinline int udf_process_sequence( uint16_t ident; int ret; unsigned int indirections = 0; - - memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); + struct desc_seq_scan_data data; + unsigned int i; + + memset(data.vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); + data.size_part_descs = PART_DESC_ALLOC_STEP; + data.part_descs_loc = kzalloc(sizeof(*data.part_descs_loc) * + data.size_part_descs, GFP_KERNEL); + if (!data.part_descs_loc) + return -ENOMEM; /* * Read the main descriptor sequence and find which descriptors @@ -1672,19 +1715,21 @@ static noinline int udf_process_sequence( case TAG_IDENT_IUVD: /* ISO 13346 3/10.4 */ case TAG_IDENT_LVD: /* ISO 13346 3/10.6 */ case TAG_IDENT_USD: /* ISO 13346 3/10.8 */ - curr = get_volume_descriptor_record(vds, ident); + case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ + curr = get_volume_descriptor_record(ident, bh, &data); + if (IS_ERR(curr)) { + brelse(bh); + return PTR_ERR(curr); + } + /* Descriptor we don't care about? */ + if (!curr) + break; if (vdsn >= curr->volDescSeqNum) { curr->volDescSeqNum = vdsn; curr->block = block; } break; - case TAG_IDENT_PD: /* ISO 13346 3/10.5 */ - curr = &vds[VDS_POS_PARTITION_DESC]; - if (!curr->block) - curr->block = block; - break; case TAG_IDENT_TD: /* ISO 13346 3/10.9 */ - vds[VDS_POS_TERMINATING_DESC].block = block; done = true; break; } @@ -1694,31 +1739,27 @@ static noinline int udf_process_sequence( * Now read interesting descriptors again and process them * in a suitable order */ - if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) { + if (!data.vds[VDS_POS_PRIMARY_VOL_DESC].block) { udf_err(sb, "Primary Volume Descriptor not found!\n"); return -EAGAIN; } - ret = udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block); + ret = udf_load_pvoldesc(sb, data.vds[VDS_POS_PRIMARY_VOL_DESC].block); if (ret < 0) return ret; - if (vds[VDS_POS_LOGICAL_VOL_DESC].block) { + if (data.vds[VDS_POS_LOGICAL_VOL_DESC].block) { ret = udf_load_logicalvol(sb, - vds[VDS_POS_LOGICAL_VOL_DESC].block, - fileset); + data.vds[VDS_POS_LOGICAL_VOL_DESC].block, + fileset); if (ret < 0) return ret; } - if (vds[VDS_POS_PARTITION_DESC].block) { - /* - * We rescan the whole descriptor sequence to find - * partition descriptor blocks and process them. - */ - for (block = vds[VDS_POS_PARTITION_DESC].block; - block < vds[VDS_POS_TERMINATING_DESC].block; - block++) { - ret = udf_load_partdesc(sb, block); + /* Now handle prevailing Partition Descriptors */ + for (i = 0; i < data.size_part_descs; i++) { + if (data.part_descs_loc[i].block) { + ret = udf_load_partdesc(sb, + data.part_descs_loc[i].block); if (ret < 0) return ret; } -- cgit v1.2.1 From 70260e44750356fecb40ff5fcb0f91bcc911ab5f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Feb 2018 17:27:44 +0100 Subject: udf: Ignore [ug]id=ignore mount options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently uid=ignore and gid=ignore make no sense without uid= and gid= respectively as they result in all files having invalid uid / gid which then doesn't allow even root to modify files and thus causes confusion. And since commit ca76d2d8031f "UDF: fix UID and GID mount option ignorance" (from over 10 years ago) uid= overrides all uids on disk as uid=ignore does. So just silently ignore uid=ignore mount option. Reviewed-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/inode.c | 2 -- fs/udf/super.c | 10 ++-------- fs/udf/udf_sb.h | 14 ++++++-------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index c23744d5ae5c..9021c15cec17 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1402,13 +1402,11 @@ reread: read_lock(&sbi->s_cred_lock); i_uid_write(inode, le32_to_cpu(fe->uid)); if (!uid_valid(inode->i_uid) || - UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET)) inode->i_uid = UDF_SB(inode->i_sb)->s_uid; i_gid_write(inode, le32_to_cpu(fe->gid)); if (!gid_valid(inode->i_gid) || - UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_IGNORE) || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET)) inode->i_gid = UDF_SB(inode->i_sb)->s_gid; diff --git a/fs/udf/super.c b/fs/udf/super.c index 28ee2ac2503d..4d671be50059 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -348,12 +348,8 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",shortad"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_FORGET)) seq_puts(seq, ",uid=forget"); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_IGNORE)) - seq_puts(seq, ",uid=ignore"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_FORGET)) seq_puts(seq, ",gid=forget"); - if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE)) - seq_puts(seq, ",gid=ignore"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) seq_printf(seq, ",uid=%u", from_kuid(&init_user_ns, sbi->s_uid)); if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) @@ -609,14 +605,12 @@ static int udf_parse_options(char *options, struct udf_options *uopt, uopt->flags |= (1 << UDF_FLAG_NLS_MAP); break; #endif - case Opt_uignore: - uopt->flags |= (1 << UDF_FLAG_UID_IGNORE); - break; case Opt_uforget: uopt->flags |= (1 << UDF_FLAG_UID_FORGET); break; + case Opt_uignore: case Opt_gignore: - uopt->flags |= (1 << UDF_FLAG_GID_IGNORE); + /* These options are superseeded by uid= */ break; case Opt_gforget: uopt->flags |= (1 << UDF_FLAG_GID_FORGET); diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 68c9f1d618f5..9dcb475fc74e 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -23,14 +23,12 @@ #define UDF_FLAG_NLS_MAP 9 #define UDF_FLAG_UTF8 10 #define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */ -#define UDF_FLAG_UID_IGNORE 12 /* use sb uid instead of on disk uid */ -#define UDF_FLAG_GID_FORGET 13 -#define UDF_FLAG_GID_IGNORE 14 -#define UDF_FLAG_UID_SET 15 -#define UDF_FLAG_GID_SET 16 -#define UDF_FLAG_SESSION_SET 17 -#define UDF_FLAG_LASTBLOCK_SET 18 -#define UDF_FLAG_BLOCKSIZE_SET 19 +#define UDF_FLAG_GID_FORGET 12 +#define UDF_FLAG_UID_SET 13 +#define UDF_FLAG_GID_SET 14 +#define UDF_FLAG_SESSION_SET 15 +#define UDF_FLAG_LASTBLOCK_SET 16 +#define UDF_FLAG_BLOCKSIZE_SET 17 #define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001 #define UDF_PART_FLAG_UNALLOC_TABLE 0x0002 -- cgit v1.2.1 From ecd10aa42819cd5dcf639d25575e95a5bda8d08a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Feb 2018 17:59:31 +0100 Subject: udf: Apply uid/gid mount options also to new inodes & chown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently newly created files belong to current user despite uid= / gid= mount options. This is confusing to users (as owner of the file will change after remount / eviction from cache) and also inconsistent with e.g. FAT with the same mount option. So apply uid= and gid= also to newly created inodes and similarly as FAT disallow to change owner of the file in this case. Reported-by: Steve Kenton Reviewed-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/file.c | 10 ++++++++++ fs/udf/ialloc.c | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/fs/udf/file.c b/fs/udf/file.c index 356c2bf148a5..cd31e4f6d6da 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -257,12 +257,22 @@ const struct file_operations udf_file_operations = { static int udf_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct super_block *sb = inode->i_sb; int error; error = setattr_prepare(dentry, attr); if (error) return error; + if ((attr->ia_valid & ATTR_UID) && + UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET) && + !uid_eq(attr->ia_uid, UDF_SB(sb)->s_uid)) + return -EPERM; + if ((attr->ia_valid & ATTR_GID) && + UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET) && + !gid_eq(attr->ia_gid, UDF_SB(sb)->s_gid)) + return -EPERM; + if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { error = udf_setsize(inode, attr->ia_size); diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index b6e420c1bfeb..b7a0d4b4bda1 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -104,6 +104,10 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) } inode_init_owner(inode, dir, mode); + if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) + inode->i_uid = sbi->s_uid; + if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) + inode->i_gid = sbi->s_gid; iinfo->i_location.logicalBlockNum = block; iinfo->i_location.partitionReferenceNum = -- cgit v1.2.1 From 0c9850f4d4c5d645125869fe0fa206fb662bd98b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Feb 2018 10:28:52 +0100 Subject: udf: Clean up handling of invalid uid/gid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current code relies on the fact that invalid uid/gid as defined by UDF 2.60 3.3.3.1 and 3.3.3.2 coincides with invalid uid/gid as used by the user namespaces implementation. Since this is only lucky coincidence, clean this up to avoid future surprises in case user namespaces implementation changes. Also this is more robust in presence of valid (from UDF point of view) uids / gids which do not map into current user namespace. Reviewed-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/inode.c | 21 +++++++++++++-------- fs/udf/udfdecl.h | 2 ++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 9021c15cec17..c80765d62f7e 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1275,6 +1275,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode) unsigned int indirections = 0; int bs = inode->i_sb->s_blocksize; int ret = -EIO; + uint32_t uid, gid; reread: if (iloc->partitionReferenceNum >= sbi->s_partitions) { @@ -1400,15 +1401,19 @@ reread: ret = -EIO; read_lock(&sbi->s_cred_lock); - i_uid_write(inode, le32_to_cpu(fe->uid)); - if (!uid_valid(inode->i_uid) || + uid = le32_to_cpu(fe->uid); + if (uid == UDF_INVALID_ID || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET)) - inode->i_uid = UDF_SB(inode->i_sb)->s_uid; + inode->i_uid = sbi->s_uid; + else + i_uid_write(inode, uid); - i_gid_write(inode, le32_to_cpu(fe->gid)); - if (!gid_valid(inode->i_gid) || + gid = le32_to_cpu(fe->gid); + if (gid == UDF_INVALID_ID || UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET)) - inode->i_gid = UDF_SB(inode->i_sb)->s_gid; + inode->i_gid = sbi->s_gid; + else + i_gid_write(inode, gid); if (fe->icbTag.fileType != ICBTAG_FILE_TYPE_DIRECTORY && sbi->s_fmode != UDF_INVALID_MODE) @@ -1653,12 +1658,12 @@ static int udf_update_inode(struct inode *inode, int do_sync) } if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET)) - fe->uid = cpu_to_le32(-1); + fe->uid = cpu_to_le32(UDF_INVALID_ID); else fe->uid = cpu_to_le32(i_uid_read(inode)); if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET)) - fe->gid = cpu_to_le32(-1); + fe->gid = cpu_to_le32(UDF_INVALID_ID); else fe->gid = cpu_to_le32(i_gid_read(inode)); diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index f5e0fe78979e..68e8a64d22e0 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -48,6 +48,8 @@ extern __printf(3, 4) void _udf_warn(struct super_block *sb, #define UDF_EXTENT_LENGTH_MASK 0x3FFFFFFF #define UDF_EXTENT_FLAG_MASK 0xC0000000 +#define UDF_INVALID_ID ((uint32_t)-1) + #define UDF_NAME_PAD 4 #define UDF_NAME_LEN 254 #define UDF_NAME_LEN_CS0 255 -- cgit v1.2.1 From 116e5258e4115aca0c64ac0bf40ded3b353ed626 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Feb 2018 10:39:52 +0100 Subject: udf: Provide saner default for invalid uid / gid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently when UDF filesystem is recorded without uid / gid (ids are set to -1), we will assign INVALID_[UG]ID to vfs inode unless user uses uid= and gid= mount options. In such case filesystem could not be modified in any way as VFS refuses to modify files with invalid ids (even by root). This is confusing to users and not very useful default since such media mode is generally used for removable media. Use overflow[ug]id instead so that at least root can modify the filesystem. Reported-by: Steve Kenton Reviewed-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 4d671be50059..31d8ce13fa18 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2111,8 +2111,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) bool lvid_open = false; uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); - uopt.uid = INVALID_UID; - uopt.gid = INVALID_GID; + /* By default we'll use overflow[ug]id when UDF inode [ug]id == -1 */ + uopt.uid = make_kuid(current_user_ns(), overflowuid); + uopt.gid = make_kgid(current_user_ns(), overflowgid); uopt.umask = 0; uopt.fmode = UDF_INVALID_MODE; uopt.dmode = UDF_INVALID_MODE; -- cgit v1.2.1 From 6ccd5194a0f42b0f01a3f9d0c6de536940d04bad Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Feb 2018 10:52:34 +0100 Subject: udf: Update mount option documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update documentation of uid and gid mount options. Reviewed-by: Pali Rohár Signed-off-by: Jan Kara --- Documentation/filesystems/udf.txt | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt index d3d0e3218f86..cf30bdcdcc4f 100644 --- a/Documentation/filesystems/udf.txt +++ b/Documentation/filesystems/udf.txt @@ -36,18 +36,14 @@ The following mount options are supported: iocharset= Set the NLS character set The uid= and gid= options need a bit more explaining. They will accept a -decimal numeric value which will be used as the default ID for that mount. -They will also accept the string "ignore" and "forget". For files on the disk -that are owned by nobody ( -1 ), they will instead look as if they are owned -by the default ID. The ignore option causes the default ID to override all -IDs on the disk, not just -1. The forget option causes all IDs to be written -to disk as -1, so when the media is later remounted, they will appear to be -owned by whatever default ID it is mounted with at that time. +decimal numeric value and all inodes on that mount will then appear as +belonging to that uid and gid. Mount options also accept the string "forget". +The forget option causes all IDs to be written to disk as -1 which is a way +of UDF standard to indicate that IDs are not supported for these files . -For typical desktop use of removable media, you should set the ID to that -of the interactively logged on user, and also specify both the forget and -ignore options. This way the interactive user will always see the files -on the disk as belonging to him. +For typical desktop use of removable media, you should set the ID to that of +the interactively logged on user, and also specify the forget option. This way +the interactive user will always see the files on the disk as belonging to him. The remaining are for debugging and disaster recovery: -- cgit v1.2.1 From f0c4a81711cb773b60cb8f90173e6491e67bf74f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Feb 2018 14:33:04 +0100 Subject: udf: Remove never implemented mount options Signed-off-by: Jan Kara --- Documentation/filesystems/udf.txt | 8 -------- fs/udf/super.c | 28 +--------------------------- 2 files changed, 1 insertion(+), 35 deletions(-) diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt index cf30bdcdcc4f..e2f2faf32f18 100644 --- a/Documentation/filesystems/udf.txt +++ b/Documentation/filesystems/udf.txt @@ -53,16 +53,8 @@ The following expect a offset from 0. session= Set the CDROM session (default= last session) anchor= Override standard anchor location. (default= 256) - volume= Override the VolumeDesc location. (unused) - partition= Override the PartitionDesc location. (unused) lastblock= Set the last block of the filesystem/ -The following expect a offset from the partition root. - - fileset= Override the fileset block location. (unused) - rootdir= Override the root directory location. (unused) - WARNING: overriding the rootdir to a non-directory may - yield highly unpredictable results. ------------------------------------------------------------------------------- diff --git a/fs/udf/super.c b/fs/udf/super.c index 31d8ce13fa18..2d4929fa884d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -222,10 +222,6 @@ struct udf_options { unsigned int session; unsigned int lastblock; unsigned int anchor; - unsigned int volume; - unsigned short partition; - unsigned int fileset; - unsigned int rootdir; unsigned int flags; umode_t umask; kgid_t gid; @@ -366,10 +362,6 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",lastblock=%u", sbi->s_last_block); if (sbi->s_anchor != 0) seq_printf(seq, ",anchor=%u", sbi->s_anchor); - /* - * volume, partition, fileset and rootdir seem to be ignored - * currently - */ if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) seq_puts(seq, ",utf8"); if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map) @@ -482,13 +474,9 @@ static int udf_parse_options(char *options, struct udf_options *uopt, int option; uopt->novrs = 0; - uopt->partition = 0xFFFF; uopt->session = 0xFFFFFFFF; uopt->lastblock = 0; uopt->anchor = 0; - uopt->volume = 0xFFFFFFFF; - uopt->rootdir = 0xFFFFFFFF; - uopt->fileset = 0xFFFFFFFF; uopt->nls_map = NULL; if (!options) @@ -577,24 +565,10 @@ static int udf_parse_options(char *options, struct udf_options *uopt, uopt->anchor = option; break; case Opt_volume: - if (match_int(args, &option)) - return 0; - uopt->volume = option; - break; case Opt_partition: - if (match_int(args, &option)) - return 0; - uopt->partition = option; - break; case Opt_fileset: - if (match_int(args, &option)) - return 0; - uopt->fileset = option; - break; case Opt_rootdir: - if (match_int(args, &option)) - return 0; - uopt->rootdir = option; + /* Ignored (never implemented properly) */ break; case Opt_utf8: uopt->flags |= (1 << UDF_FLAG_UTF8); -- cgit v1.2.1 From 1f5eaa90010ed7cf0ae90a526c48657d02c6086f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Feb 2018 14:10:59 +0100 Subject: fanotify: Avoid lost events due to ENOMEM for unlimited queues Fanotify queues of unlimited length do not expect events can be lost. Since these queues are used for system auditing and other security related tasks, loosing events can even have security implications. Currently, since the allocation is small (32-bytes), it cannot fail however when we start accounting events in memcgs, allocation can start failing. So avoid loosing events due to failure to allocate memory by making event allocation use __GFP_NOFAIL. Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 19 ++++++++++++++----- fs/notify/fanotify/fanotify.h | 3 ++- fs/notify/fanotify/fanotify_user.c | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6702a6a0bbb5..928f2a5eedb7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -139,23 +139,32 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, return false; } -struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, +struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, + struct inode *inode, u32 mask, const struct path *path) { struct fanotify_event_info *event; + gfp_t gfp = GFP_KERNEL; + + /* + * For queues with unlimited length lost events are not expected and + * can possibly have security implications. Avoid losing events when + * memory is short. + */ + if (group->max_events == UINT_MAX) + gfp |= __GFP_NOFAIL; if (fanotify_is_perm_event(mask)) { struct fanotify_perm_event_info *pevent; - pevent = kmem_cache_alloc(fanotify_perm_event_cachep, - GFP_KERNEL); + pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); if (!pevent) return NULL; event = &pevent->fae; pevent->response = 0; goto init; } - event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); + event = kmem_cache_alloc(fanotify_event_cachep, gfp); if (!event) return NULL; init: __maybe_unused @@ -210,7 +219,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, return 0; } - event = fanotify_alloc_event(inode, mask, data); + event = fanotify_alloc_event(group, inode, mask, data); ret = -ENOMEM; if (unlikely(!event)) goto finish; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 256d9d1ddea9..8609ba06f474 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -52,5 +52,6 @@ static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct fanotify_event_info, fse); } -struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, +struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, + struct inode *inode, u32 mask, const struct path *path); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c07eb3d655ea..72e367822efb 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -757,7 +757,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.user = user; atomic_inc(&user->fanotify_listeners); - oevent = fanotify_alloc_event(NULL, FS_Q_OVERFLOW, NULL); + oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); if (unlikely(!oevent)) { fd = -ENOMEM; goto out_destroy_group; -- cgit v1.2.1 From 7b1f641776e0c8b824fb10135168e4b683a9e2ba Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Feb 2018 15:07:52 +0100 Subject: fsnotify: Let userspace know about lost events due to ENOMEM Currently if notification event is lost due to event allocation failing we ENOMEM, we just silently continue (except for fanotify permission events where we deny the access). This is undesirable as userspace has no way of knowing whether the notifications it got are complete or not. Treat lost events due to ENOMEM the same way as lost events due to queue overflow so that userspace knows something bad happened and it likely needs to rescan the filesystem. Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 9 ++++++++- fs/notify/inotify/inotify_fsnotify.c | 8 +++++++- fs/notify/notification.c | 3 ++- include/linux/fsnotify_backend.h | 6 ++++++ 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 928f2a5eedb7..d51e1bb781cf 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -221,8 +221,15 @@ static int fanotify_handle_event(struct fsnotify_group *group, event = fanotify_alloc_event(group, inode, mask, data); ret = -ENOMEM; - if (unlikely(!event)) + if (unlikely(!event)) { + /* + * We don't queue overflow events for permission events as + * there the access is denied and so no event is in fact lost. + */ + if (!fanotify_is_perm_event(mask)) + fsnotify_queue_overflow(group); goto finish; + } fsn_event = &event->fse; ret = fsnotify_add_event(group, fsn_event, fanotify_merge); diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 8b73332735ba..40dedb37a1f3 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -99,8 +99,14 @@ int inotify_handle_event(struct fsnotify_group *group, fsn_mark); event = kmalloc(alloc_len, GFP_KERNEL); - if (unlikely(!event)) + if (unlikely(!event)) { + /* + * Treat lost event due to ENOMEM the same way as queue + * overflow to let userspace know event was lost. + */ + fsnotify_queue_overflow(group); return -ENOMEM; + } fsn_event = &event->fse; fsnotify_init_event(fsn_event, inode, mask); diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 66f85c651c52..3c3e36745f59 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -111,7 +111,8 @@ int fsnotify_add_event(struct fsnotify_group *group, return 2; } - if (group->q_len >= group->max_events) { + if (event == group->overflow_event || + group->q_len >= group->max_events) { ret = 2; /* Queue overflow event only if it isn't already queued */ if (!list_empty(&group->overflow_event->list)) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 067d52e95f02..9f1edb92c97e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -331,6 +331,12 @@ extern int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct list_head *, struct fsnotify_event *)); +/* Queue overflow event to a notification group */ +static inline void fsnotify_queue_overflow(struct fsnotify_group *group) +{ + fsnotify_add_event(group, group->overflow_event, NULL); +} + /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ -- cgit v1.2.1 From b72e632c6c7e29343651b0e26539b2e01444dfe6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 27 Feb 2018 18:55:31 +0100 Subject: udf: Do not mark possibly inconsistent filesystems as closed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If logical volume integrity descriptor contains non-closed integrity type when mounting the volume, there are high chances that the volume is not consistent (device was detached before the filesystem was unmounted). Don't touch integrity type of such volume so that fsck can recognize it and check such filesystem. Reported-by: Pali Rohár Reviewed-by: Pali Rohár Signed-off-by: Jan Kara --- fs/udf/super.c | 8 ++++++-- fs/udf/udf_sb.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 2d4929fa884d..bf5f6084dcb2 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1988,7 +1988,10 @@ static void udf_open_lvid(struct super_block *sb) lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; ktime_get_real_ts(&ts); udf_time_to_disk_stamp(&lvid->recordingDateAndTime, ts); - lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN); + if (le32_to_cpu(lvid->integrityType) == LVID_INTEGRITY_TYPE_CLOSE) + lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN); + else + UDF_SET_FLAG(sb, UDF_FLAG_INCONSISTENT); lvid->descTag.descCRC = cpu_to_le16( crc_itu_t(0, (char *)lvid + sizeof(struct tag), @@ -2028,7 +2031,8 @@ static void udf_close_lvid(struct super_block *sb) lvidiu->minUDFReadRev = cpu_to_le16(sbi->s_udfrev); if (sbi->s_udfrev > le16_to_cpu(lvidiu->minUDFWriteRev)) lvidiu->minUDFWriteRev = cpu_to_le16(sbi->s_udfrev); - lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); + if (!UDF_QUERY_FLAG(sb, UDF_FLAG_INCONSISTENT)) + lvid->integrityType = cpu_to_le32(LVID_INTEGRITY_TYPE_CLOSE); lvid->descTag.descCRC = cpu_to_le16( crc_itu_t(0, (char *)lvid + sizeof(struct tag), diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 9dcb475fc74e..9dd3e1b9619e 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -29,6 +29,7 @@ #define UDF_FLAG_SESSION_SET 15 #define UDF_FLAG_LASTBLOCK_SET 16 #define UDF_FLAG_BLOCKSIZE_SET 17 +#define UDF_FLAG_INCONSISTENT 18 #define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001 #define UDF_PART_FLAG_UNALLOC_TABLE 0x0002 -- cgit v1.2.1 From d09099051057bbae5e1a887167ad6189ac8bfca4 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Mon, 26 Feb 2018 17:45:12 +0800 Subject: ext2: change return code to -ENOMEM when failing memory allocation Change return code to -ENOMEM from -EINVAL when failing memory allocation in fill_super(), meanwhile delete redundant initial assignment of variable err. Signed-off-by: Chengguang Xu Signed-off-by: Jan Kara --- fs/ext2/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7666c065b96f..de1694512f1f 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -827,7 +827,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) unsigned long logic_sb_block; unsigned long offset = 0; unsigned long def_mount_opts; - long ret = -EINVAL; + long ret = -ENOMEM; int blocksize = BLOCK_SIZE; int db_count; int i, j; @@ -835,7 +835,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) int err; struct ext2_mount_options opts; - err = -ENOMEM; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) goto failed; @@ -851,6 +850,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_daxdev = dax_dev; spin_lock_init(&sbi->s_lock); + ret = -EINVAL; /* * See what the current blocksize for the device is, and -- cgit v1.2.1 From 785dffe1daf95c176504dea3069c9df11af7ff15 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sun, 25 Feb 2018 21:25:07 +0800 Subject: udf: fix potential refcnt problem of nls module When specifiying iocharset multiple times in a mount or once/multiple in a remount, current option parsing may cause inaccurate refcount of nls module. Also, in the failure cleanup of option parsing, the condition of calling unload_nls is not sufficient. Signed-off-by: Chengguang Xu Signed-off-by: Jan Kara --- fs/udf/super.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index bf5f6084dcb2..7949c338efa5 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -477,7 +477,6 @@ static int udf_parse_options(char *options, struct udf_options *uopt, uopt->session = 0xFFFFFFFF; uopt->lastblock = 0; uopt->anchor = 0; - uopt->nls_map = NULL; if (!options) return 1; @@ -575,8 +574,12 @@ static int udf_parse_options(char *options, struct udf_options *uopt, break; #ifdef CONFIG_UDF_NLS case Opt_iocharset: - uopt->nls_map = load_nls(args[0].from); - uopt->flags |= (1 << UDF_FLAG_NLS_MAP); + if (!remount) { + if (uopt->nls_map) + unload_nls(uopt->nls_map); + uopt->nls_map = load_nls(args[0].from); + uopt->flags |= (1 << UDF_FLAG_NLS_MAP); + } break; #endif case Opt_uforget: @@ -627,6 +630,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) uopt.umask = sbi->s_umask; uopt.fmode = sbi->s_fmode; uopt.dmode = sbi->s_dmode; + uopt.nls_map = NULL; if (!udf_parse_options(options, &uopt, true)) return -EINVAL; @@ -2095,6 +2099,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) uopt.umask = 0; uopt.fmode = UDF_INVALID_MODE; uopt.dmode = UDF_INVALID_MODE; + uopt.nls_map = NULL; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) @@ -2275,8 +2280,8 @@ error_out: iput(sbi->s_vat_inode); parse_options_failure: #ifdef CONFIG_UDF_NLS - if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) - unload_nls(sbi->s_nls_map); + if (uopt.nls_map) + unload_nls(uopt.nls_map); #endif if (lvid_open) udf_close_lvid(sb); -- cgit v1.2.1 From a9cee1764b6fa97f2b4cd0210b1a125fef2e7169 Mon Sep 17 00:00:00 2001 From: Kyle Spiers Date: Thu, 8 Mar 2018 17:28:43 -0800 Subject: reiserfs: Remove VLA from fs/reiserfs/reiserfs.h Remove Variable Length Array from fs/reiserfs/reiserfs.h. EMPTY_DIR_SIZE is used as an array size and as it is using strlen() it need not be evaluated at compile time. Change it's definition to use sizeof() to force evaluation of array length at compile time. Signed-off-by: Kyle Spiers Signed-off-by: Jan Kara --- fs/reiserfs/reiserfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 48835a659948..ae4811fecc1f 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1916,7 +1916,7 @@ struct reiserfs_de_head { /* empty directory contains two entries "." and ".." and their headers */ #define EMPTY_DIR_SIZE \ -(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen (".."))) +(DEH_SIZE * 2 + ROUND_UP (sizeof(".") - 1) + ROUND_UP (sizeof("..") - 1)) /* old format directories have this size when empty */ #define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) -- cgit v1.2.1 From b91ed9d8082c394dda63f94f935219cd0a565938 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Fri, 16 Mar 2018 19:13:02 +0530 Subject: quota: Kill an unused extern entry form quota.h Kill an unused extern entry from quota.h which is leftover of below patch. [f32764bd2: quota: Convert quota statistics to generic percpu_counter] Signed-off-by: Ritesh Harjani Signed-off-by: Jan Kara --- include/linux/quota.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/quota.h b/include/linux/quota.h index 5ac9de4fcd6f..ca9772c8e48b 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -267,7 +267,6 @@ struct dqstats { struct percpu_counter counter[_DQST_DQSTAT_LAST]; }; -extern struct dqstats *dqstats_pcpu; extern struct dqstats dqstats; static inline void dqstats_inc(unsigned int type) -- cgit v1.2.1