From cbd7584e6ead1b79fb0b81573f158b57fa1f0b49 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Nov 2014 11:41:49 -0500 Subject: ext4: fix block reservation for bigalloc filesystems For bigalloc filesystems we have to check whether newly requested inode block isn't already part of a cluster for which we already have delayed allocation reservation. This check happens in ext4_ext_map_blocks() and that function sets EXT4_MAP_FROM_CLUSTER if that's the case. However if ext4_da_map_blocks() finds in extent cache information about the block, we don't call into ext4_ext_map_blocks() and thus we always end up getting new reservation even if the space for cluster is already reserved. This results in overreservation and premature ENOSPC reports. Fix the problem by checking for existing cluster reservation already in ext4_da_map_blocks(). That simplifies the logic and actually allows us to get rid of the EXT4_MAP_FROM_CLUSTER flag completely. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index ff4bd1b35246..bb7dcbe99652 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -50,8 +50,7 @@ struct extent_status; { EXT4_MAP_NEW, "N" }, \ { EXT4_MAP_MAPPED, "M" }, \ { EXT4_MAP_UNWRITTEN, "U" }, \ - { EXT4_MAP_BOUNDARY, "B" }, \ - { EXT4_MAP_FROM_CLUSTER, "C" }) + { EXT4_MAP_BOUNDARY, "B" }) #define show_free_flags(flags) __print_flags(flags, "|", \ { EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \ -- cgit v1.2.1 From 2f8e0a7c6c89f850ebd5d6c0b9a08317030d1b89 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Tue, 25 Nov 2014 11:44:37 -0500 Subject: ext4: cache extent hole in extent status tree for ext4_da_map_blocks() Currently extent status tree doesn't cache extent hole when a write looks up in extent tree to make sure whether a block has been allocated or not. In this case, we don't put extent hole in extent cache because later this extent might be removed and a new delayed extent might be added back. But it will cause a defect when we do a lot of writes. If we don't put extent hole in extent cache, the following writes also need to access extent tree to look at whether or not a block has been allocated. It brings a cache miss. This commit fixes this defect. Also if the inode doesn't have any extent, this extent hole will be cached as well. Cc: Andreas Dilger Signed-off-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index bb7dcbe99652..cd37a584ee88 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -43,8 +43,7 @@ struct extent_status; { EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \ { EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \ { EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \ - { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }, \ - { EXT4_GET_BLOCKS_NO_PUT_HOLE, "NO_PUT_HOLE" }) + { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }) #define show_mflags(flags) __print_flags(flags, "", \ { EXT4_MAP_NEW, "N" }, \ -- cgit v1.2.1 From edaa53cac8fd4b96ed4b8f96c4933158ff2dd337 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Tue, 25 Nov 2014 11:45:37 -0500 Subject: ext4: change LRU to round-robin in extent status tree shrinker In this commit we discard the lru algorithm for inodes with extent status tree because it takes significant effort to maintain a lru list in extent status tree shrinker and the shrinker can take a long time to scan this lru list in order to reclaim some objects. We replace the lru ordering with a simple round-robin. After that we never need to keep a lru list. That means that the list needn't be sorted if the shrinker can not reclaim any objects in the first round. Cc: Andreas Dilger Signed-off-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index cd37a584ee88..6cfb841fea7c 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2450,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range, TRACE_EVENT(ext4_es_shrink, TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, - int skip_precached, int nr_skipped, int retried), + int nr_skipped, int retried), - TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried), + TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried), TP_STRUCT__entry( __field( dev_t, dev ) __field( int, nr_shrunk ) __field( unsigned long long, scan_time ) - __field( int, skip_precached ) __field( int, nr_skipped ) __field( int, retried ) ), @@ -2467,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink, __entry->dev = sb->s_dev; __entry->nr_shrunk = nr_shrunk; __entry->scan_time = div_u64(scan_time, 1000); - __entry->skip_precached = skip_precached; __entry->nr_skipped = nr_skipped; __entry->retried = retried; ), - TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d " + TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu " "nr_skipped %d retried %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, - __entry->scan_time, __entry->skip_precached, - __entry->nr_skipped, __entry->retried) + __entry->scan_time, __entry->nr_skipped, __entry->retried) ); #endif /* _TRACE_EXT4_H */ -- cgit v1.2.1