diff options
| author | Alexandre Oliva <lxoliva@fsfla.org> | 2012-10-31 20:19:16 +0000 |
|---|---|---|
| committer | Alexandre Oliva <lxoliva@fsfla.org> | 2012-10-31 20:19:16 +0000 |
| commit | 77080bd489cee5dd25de4acffc6f3d126d4d4179 (patch) | |
| tree | 6a4842c9b9c63d40a18d1eceed15722e326703fd /freed-ora/current/f17 | |
| parent | cbe22bc327f03b0a0a710b457462a376ce365ead (diff) | |
| download | linux-libre-raptor-77080bd489cee5dd25de4acffc6f3d126d4d4179.tar.gz linux-libre-raptor-77080bd489cee5dd25de4acffc6f3d126d4d4179.zip | |
3.6.4-1.fc17.gnu
Diffstat (limited to 'freed-ora/current/f17')
21 files changed, 1659 insertions, 190 deletions
diff --git a/freed-ora/current/f17/0001-ext4-ext4_inode_info-diet.patch b/freed-ora/current/f17/0001-ext4-ext4_inode_info-diet.patch new file mode 100644 index 000000000..c7858ecc6 --- /dev/null +++ b/freed-ora/current/f17/0001-ext4-ext4_inode_info-diet.patch @@ -0,0 +1,121 @@ +From 50b61634cf8d09f9ef334919b859735d381cbe39 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Fri, 28 Sep 2012 23:21:09 -0400 +Subject: [PATCH 01/13] ext4: ext4_inode_info diet + +Generic inode has unused i_private pointer which may be used as cur_aio_dio +storage. + +TODO: If cur_aio_dio will be passed as an argument to get_block_t this allow + to have concurent AIO_DIO requests. + +Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> +Reviewed-by: Jan Kara <jack@suse.cz> +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit f45ee3a1ea438af96e4fd2c0b16d195e67ef235f) +--- + fs/ext4/ext4.h | 12 ++++++++++-- + fs/ext4/extents.c | 4 ++-- + fs/ext4/inode.c | 6 +++--- + fs/ext4/super.c | 1 - + 4 files changed, 15 insertions(+), 8 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index c3411d4..80afc8f 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -912,8 +912,6 @@ struct ext4_inode_info { + struct list_head i_completed_io_list; + spinlock_t i_completed_io_lock; + atomic_t i_ioend_count; /* Number of outstanding io_end structs */ +- /* current io_end structure for async DIO write*/ +- ext4_io_end_t *cur_aio_dio; + atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ + + spinlock_t i_block_reservation_lock; +@@ -1332,6 +1330,16 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, + } + } + ++static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode) ++{ ++ return inode->i_private; ++} ++ ++static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io) ++{ ++ inode->i_private = io; ++} ++ + /* + * Inode dynamic state flags + */ +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index aabbb3f..51fbef1 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -3600,7 +3600,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, + { + int ret = 0; + int err = 0; +- ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; ++ ext4_io_end_t *io = ext4_inode_aio(inode); + + ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " + "block %llu, max_blocks %u, flags %x, allocated %u\n", +@@ -3858,7 +3858,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, + unsigned int allocated = 0, offset = 0; + unsigned int allocated_clusters = 0; + struct ext4_allocation_request ar; +- ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; ++ ext4_io_end_t *io = ext4_inode_aio(inode); + ext4_lblk_t cluster_offset; + + ext_debug("blocks %u/%u requested for inode %lu\n", +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index dff171c..acadd2b 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3054,7 +3054,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, + * hook to the iocb. + */ + iocb->private = NULL; +- EXT4_I(inode)->cur_aio_dio = NULL; ++ ext4_inode_aio_set(inode, NULL); + if (!is_sync_kiocb(iocb)) { + ext4_io_end_t *io_end = + ext4_init_io_end(inode, GFP_NOFS); +@@ -3071,7 +3071,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, + * is a unwritten extents needs to be converted + * when IO is completed. + */ +- EXT4_I(inode)->cur_aio_dio = iocb->private; ++ ext4_inode_aio_set(inode, io_end); + } + + if (overwrite) +@@ -3091,7 +3091,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, + NULL, + DIO_LOCKING); + if (iocb->private) +- EXT4_I(inode)->cur_aio_dio = NULL; ++ ext4_inode_aio_set(inode, NULL); + /* + * The io_end structure takes a reference to the inode, + * that structure needs to be destroyed and the +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index c6e0cb3..270e58f 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -956,7 +956,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) + ei->jinode = NULL; + INIT_LIST_HEAD(&ei->i_completed_io_list); + spin_lock_init(&ei->i_completed_io_lock); +- ei->cur_aio_dio = NULL; + ei->i_sync_tid = 0; + ei->i_datasync_tid = 0; + atomic_set(&ei->i_ioend_count, 0); +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0002-ext4-give-i_aiodio_unwritten-a-more-appropriate-name.patch b/freed-ora/current/f17/0002-ext4-give-i_aiodio_unwritten-a-more-appropriate-name.patch new file mode 100644 index 000000000..cfd13f386 --- /dev/null +++ b/freed-ora/current/f17/0002-ext4-give-i_aiodio_unwritten-a-more-appropriate-name.patch @@ -0,0 +1,97 @@ +From 027d1aa67e32c2c80851105c6d962f3db46eb476 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Fri, 28 Sep 2012 23:24:52 -0400 +Subject: [PATCH 02/13] ext4: give i_aiodio_unwritten a more appropriate name + +AIO/DIO prefix is wrong because it account unwritten extents which +also may be scheduled from buffered write endio + +Reviewed-by: Jan Kara <jack@suse.cz> +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit e27f41e1b789e60e7d8cc9c81fd93ca49ef31f13) +--- + fs/ext4/ext4.h | 4 ++-- + fs/ext4/file.c | 6 +++--- + fs/ext4/page-io.c | 2 +- + fs/ext4/super.c | 2 +- + 4 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 80afc8f..28dfd9b 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -912,7 +912,7 @@ struct ext4_inode_info { + struct list_head i_completed_io_list; + spinlock_t i_completed_io_lock; + atomic_t i_ioend_count; /* Number of outstanding io_end structs */ +- atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ ++ atomic_t i_unwritten; /* Nr. of inflight conversions pending */ + + spinlock_t i_block_reservation_lock; + +@@ -1326,7 +1326,7 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, + { + if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + io_end->flag |= EXT4_IO_END_UNWRITTEN; +- atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); ++ atomic_inc(&EXT4_I(inode)->i_unwritten); + } + } + +diff --git a/fs/ext4/file.c b/fs/ext4/file.c +index 3b0e3bd..39335bd 100644 +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -55,11 +55,11 @@ static int ext4_release_file(struct inode *inode, struct file *filp) + return 0; + } + +-static void ext4_aiodio_wait(struct inode *inode) ++static void ext4_unwritten_wait(struct inode *inode) + { + wait_queue_head_t *wq = ext4_ioend_wq(inode); + +- wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); ++ wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0)); + } + + /* +@@ -116,7 +116,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, + "performance will be poor.", + inode->i_ino, current->comm); + mutex_lock(ext4_aio_mutex(inode)); +- ext4_aiodio_wait(inode); ++ ext4_unwritten_wait(inode); + } + + BUG_ON(iocb->ki_pos != pos); +diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c +index dcdeef1..de77e31 100644 +--- a/fs/ext4/page-io.c ++++ b/fs/ext4/page-io.c +@@ -113,7 +113,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io) + if (io->flag & EXT4_IO_END_DIRECT) + inode_dio_done(inode); + /* Wake up anyone waiting on unwritten extent conversion */ +- if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) ++ if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) + wake_up_all(ext4_ioend_wq(io->inode)); + return ret; + } +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 270e58f..1b6b425 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -959,7 +959,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) + ei->i_sync_tid = 0; + ei->i_datasync_tid = 0; + atomic_set(&ei->i_ioend_count, 0); +- atomic_set(&ei->i_aiodio_unwritten, 0); ++ atomic_set(&ei->i_unwritten, 0); + + return &ei->vfs_inode; + } +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0003-ext4-fix-unwritten-counter-leakage.patch b/freed-ora/current/f17/0003-ext4-fix-unwritten-counter-leakage.patch new file mode 100644 index 000000000..2f1d0d813 --- /dev/null +++ b/freed-ora/current/f17/0003-ext4-fix-unwritten-counter-leakage.patch @@ -0,0 +1,112 @@ +From 6a0e905bb7320571ed5fdd2d5efa3d642630b4f7 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Fri, 28 Sep 2012 23:36:25 -0400 +Subject: [PATCH 03/13] ext4: fix unwritten counter leakage + +ext4_set_io_unwritten_flag() will increment i_unwritten counter, so +once we mark end_io with EXT4_END_IO_UNWRITTEN we have to revert it back +on error path. + + - add missed error checks to prevent counter leakage + - ext4_end_io_nolock() will clear EXT4_END_IO_UNWRITTEN flag to signal + that conversion finished. + - add BUG_ON to ext4_free_end_io() to prevent similar leakage in future. + +Visible effect of this bug is that unaligned aio_stress may deadlock + +Reviewed-by: Jan Kara <jack@suse.cz> +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit 82e54229118785badffb4ef5ba4803df25fe007f) +--- + fs/ext4/extents.c | 21 ++++++++++++++------- + fs/ext4/page-io.c | 6 +++++- + 2 files changed, 19 insertions(+), 8 deletions(-) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 51fbef1..e04eb4f 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -3615,6 +3615,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, + if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { + ret = ext4_split_unwritten_extents(handle, inode, map, + path, flags); ++ if (ret <= 0) ++ goto out; + /* + * Flag the inode(non aio case) or end_io struct (aio case) + * that this IO needs to conversion to written when IO is +@@ -3860,6 +3862,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_allocation_request ar; + ext4_io_end_t *io = ext4_inode_aio(inode); + ext4_lblk_t cluster_offset; ++ int set_unwritten = 0; + + ext_debug("blocks %u/%u requested for inode %lu\n", + map->m_lblk, map->m_len, inode->i_ino); +@@ -4082,13 +4085,8 @@ got_allocated_blocks: + * For non asycn direct IO case, flag the inode state + * that we need to perform conversion when IO is done. + */ +- if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { +- if (io) +- ext4_set_io_unwritten_flag(inode, io); +- else +- ext4_set_inode_state(inode, +- EXT4_STATE_DIO_UNWRITTEN); +- } ++ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) ++ set_unwritten = 1; + if (ext4_should_dioread_nolock(inode)) + map->m_flags |= EXT4_MAP_UNINIT; + } +@@ -4100,6 +4098,15 @@ got_allocated_blocks: + if (!err) + err = ext4_ext_insert_extent(handle, inode, path, + &newex, flags); ++ ++ if (!err && set_unwritten) { ++ if (io) ++ ext4_set_io_unwritten_flag(inode, io); ++ else ++ ext4_set_inode_state(inode, ++ EXT4_STATE_DIO_UNWRITTEN); ++ } ++ + if (err && free_on_err) { + int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? + EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; +diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c +index de77e31..9970022 100644 +--- a/fs/ext4/page-io.c ++++ b/fs/ext4/page-io.c +@@ -71,6 +71,8 @@ void ext4_free_io_end(ext4_io_end_t *io) + int i; + + BUG_ON(!io); ++ BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); ++ + if (io->page) + put_page(io->page); + for (i = 0; i < io->num_io_pages; i++) +@@ -94,6 +96,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io) + ssize_t size = io->size; + int ret = 0; + ++ BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); ++ + ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," + "list->prev 0x%p\n", + io, inode->i_ino, io->list.next, io->list.prev); +@@ -106,7 +110,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io) + "(inode %lu, offset %llu, size %zd, error %d)", + inode->i_ino, offset, size, ret); + } +- ++ io->flag &= ~EXT4_IO_END_UNWRITTEN; + if (io->iocb) + aio_complete(io->iocb, io->result, 0); + +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0004-ext4-completed_io-locking-cleanup.patch b/freed-ora/current/f17/0004-ext4-completed_io-locking-cleanup.patch new file mode 100644 index 000000000..a358a7952 --- /dev/null +++ b/freed-ora/current/f17/0004-ext4-completed_io-locking-cleanup.patch @@ -0,0 +1,520 @@ +From e23394806df0768ed2dac87484590d2f3a730d55 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Sat, 29 Sep 2012 00:14:55 -0400 +Subject: [PATCH 04/13] ext4: completed_io locking cleanup + +Current unwritten extent conversion state-machine is very fuzzy. +- For unknown reason it performs conversion under i_mutex. What for? + My diagnosis: + We already protect extent tree with i_data_sem, truncate and punch_hole + should wait for DIO, so the only data we have to protect is end_io->flags + modification, but only flush_completed_IO and end_io_work modified this + flags and we can serialize them via i_completed_io_lock. + + Currently all these games with mutex_trylock result in the following deadlock + truncate: kworker: + ext4_setattr ext4_end_io_work + mutex_lock(i_mutex) + inode_dio_wait(inode) ->BLOCK + DEADLOCK<- mutex_trylock() + inode_dio_done() + #TEST_CASE1_BEGIN + MNT=/mnt_scrach + unlink $MNT/file + fallocate -l $((1024*1024*1024)) $MNT/file + aio-stress -I 100000 -O -s 100m -n -t 1 -c 10 -o 2 -o 3 $MNT/file + sleep 2 + truncate -s 0 $MNT/file + #TEST_CASE1_END + +Or use 286's xfstests https://github.com/dmonakhov/xfstests/blob/devel/286 + +This patch makes state machine simple and clean: + +(1) xxx_end_io schedule final extent conversion simply by calling + ext4_add_complete_io(), which append it to ei->i_completed_io_list + NOTE1: because of (2A) work should be queued only if + ->i_completed_io_list was empty, otherwise the work is scheduled already. + +(2) ext4_flush_completed_IO is responsible for handling all pending + end_io from ei->i_completed_io_list + Flushing sequence consists of following stages: + A) LOCKED: Atomically drain completed_io_list to local_list + B) Perform extents conversion + C) LOCKED: move converted io's to to_free list for final deletion + This logic depends on context which we was called from. + D) Final end_io context destruction + NOTE1: i_mutex is no longer required because end_io->flags modification + is protected by ei->ext4_complete_io_lock + +Full list of changes: +- Move all completion end_io related routines to page-io.c in order to improve + logic locality +- Move open coded logic from various xx_end_xx routines to ext4_add_complete_io() +- remove EXT4_IO_END_FSYNC +- Improve SMP scalability by removing useless i_mutex which does not + protect io->flags anymore. +- Reduce lock contention on i_completed_io_lock by optimizing list walk. +- Rename ext4_end_io_nolock to end4_end_io and make it static +- Check flush completion status to ext4_ext_punch_hole(). Because it is + not good idea to punch blocks from corrupted inode. + +Changes since V3 (in request to Jan's comments): + Fall back to active flush_completed_IO() approach in order to prevent + performance issues with nolocked DIO reads. +Changes since V2: + Fix use-after-free caused by race truncate vs end_io_work + +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit 28a535f9a0df060569dcc786e5bc2e1de43d7dc7) +--- + fs/ext4/ext4.h | 3 +- + fs/ext4/extents.c | 4 +- + fs/ext4/fsync.c | 81 ------------------------- + fs/ext4/indirect.c | 6 +- + fs/ext4/inode.c | 25 +------- + fs/ext4/page-io.c | 171 +++++++++++++++++++++++++++++++++++------------------ + 6 files changed, 121 insertions(+), 169 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 28dfd9b..7687d15 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -186,7 +186,6 @@ struct mpage_da_data { + #define EXT4_IO_END_ERROR 0x0002 + #define EXT4_IO_END_QUEUED 0x0004 + #define EXT4_IO_END_DIRECT 0x0008 +-#define EXT4_IO_END_IN_FSYNC 0x0010 + + struct ext4_io_page { + struct page *p_page; +@@ -2408,11 +2407,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, + + /* page-io.c */ + extern int __init ext4_init_pageio(void); ++extern void ext4_add_complete_io(ext4_io_end_t *io_end); + extern void ext4_exit_pageio(void); + extern void ext4_ioend_wait(struct inode *); + extern void ext4_free_io_end(ext4_io_end_t *io); + extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); +-extern int ext4_end_io_nolock(ext4_io_end_t *io); + extern void ext4_io_submit(struct ext4_io_submit *io); + extern int ext4_bio_write_page(struct ext4_io_submit *io, + struct page *page, +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index e04eb4f..1fbf2ff 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4815,7 +4815,9 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) + } + + /* finish any pending end_io work */ +- ext4_flush_completed_IO(inode); ++ err = ext4_flush_completed_IO(inode); ++ if (err) ++ return err; + + credits = ext4_writepage_trans_blocks(inode); + handle = ext4_journal_start(inode, credits); +diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c +index 2a1dcea..520b058 100644 +--- a/fs/ext4/fsync.c ++++ b/fs/ext4/fsync.c +@@ -34,87 +34,6 @@ + + #include <trace/events/ext4.h> + +-static void dump_completed_IO(struct inode * inode) +-{ +-#ifdef EXT4FS_DEBUG +- struct list_head *cur, *before, *after; +- ext4_io_end_t *io, *io0, *io1; +- unsigned long flags; +- +- if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ +- ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); +- return; +- } +- +- ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); +- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); +- list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ +- cur = &io->list; +- before = cur->prev; +- io0 = container_of(before, ext4_io_end_t, list); +- after = cur->next; +- io1 = container_of(after, ext4_io_end_t, list); +- +- ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", +- io, inode->i_ino, io0, io1); +- } +- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); +-#endif +-} +- +-/* +- * This function is called from ext4_sync_file(). +- * +- * When IO is completed, the work to convert unwritten extents to +- * written is queued on workqueue but may not get immediately +- * scheduled. When fsync is called, we need to ensure the +- * conversion is complete before fsync returns. +- * The inode keeps track of a list of pending/completed IO that +- * might needs to do the conversion. This function walks through +- * the list and convert the related unwritten extents for completed IO +- * to written. +- * The function return the number of pending IOs on success. +- */ +-int ext4_flush_completed_IO(struct inode *inode) +-{ +- ext4_io_end_t *io; +- struct ext4_inode_info *ei = EXT4_I(inode); +- unsigned long flags; +- int ret = 0; +- int ret2 = 0; +- +- dump_completed_IO(inode); +- spin_lock_irqsave(&ei->i_completed_io_lock, flags); +- while (!list_empty(&ei->i_completed_io_list)){ +- io = list_entry(ei->i_completed_io_list.next, +- ext4_io_end_t, list); +- list_del_init(&io->list); +- io->flag |= EXT4_IO_END_IN_FSYNC; +- /* +- * Calling ext4_end_io_nolock() to convert completed +- * IO to written. +- * +- * When ext4_sync_file() is called, run_queue() may already +- * about to flush the work corresponding to this io structure. +- * It will be upset if it founds the io structure related +- * to the work-to-be schedule is freed. +- * +- * Thus we need to keep the io structure still valid here after +- * conversion finished. The io structure has a flag to +- * avoid double converting from both fsync and background work +- * queue work. +- */ +- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); +- ret = ext4_end_io_nolock(io); +- if (ret < 0) +- ret2 = ret; +- spin_lock_irqsave(&ei->i_completed_io_lock, flags); +- io->flag &= ~EXT4_IO_END_IN_FSYNC; +- } +- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); +- return (ret2 < 0) ? ret2 : 0; +-} +- + /* + * If we're not journaling and this is a just-created file, we have to + * sync our parent directory (if it was freshly created) since +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index 830e1b2..61f13e5 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -807,11 +807,9 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, + + retry: + if (rw == READ && ext4_should_dioread_nolock(inode)) { +- if (unlikely(!list_empty(&ei->i_completed_io_list))) { +- mutex_lock(&inode->i_mutex); ++ if (unlikely(!list_empty(&ei->i_completed_io_list))) + ext4_flush_completed_IO(inode); +- mutex_unlock(&inode->i_mutex); +- } ++ + ret = __blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index acadd2b..dd3fd23 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2879,9 +2879,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, + { + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + ext4_io_end_t *io_end = iocb->private; +- struct workqueue_struct *wq; +- unsigned long flags; +- struct ext4_inode_info *ei; + + /* if not async direct IO or dio with 0 bytes write, just return */ + if (!io_end || !size) +@@ -2910,24 +2907,14 @@ out: + io_end->iocb = iocb; + io_end->result = ret; + } +- wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; +- +- /* Add the io_end to per-inode completed aio dio list*/ +- ei = EXT4_I(io_end->inode); +- spin_lock_irqsave(&ei->i_completed_io_lock, flags); +- list_add_tail(&io_end->list, &ei->i_completed_io_list); +- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); + +- /* queue the work to convert unwritten extents to written */ +- queue_work(wq, &io_end->work); ++ ext4_add_complete_io(io_end); + } + + static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) + { + ext4_io_end_t *io_end = bh->b_private; +- struct workqueue_struct *wq; + struct inode *inode; +- unsigned long flags; + + if (!test_clear_buffer_uninit(bh) || !io_end) + goto out; +@@ -2946,15 +2933,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) + */ + inode = io_end->inode; + ext4_set_io_unwritten_flag(inode, io_end); +- +- /* Add the io_end to per-inode completed io list*/ +- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); +- list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); +- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); +- +- wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; +- /* queue the work to convert unwritten extents to written */ +- queue_work(wq, &io_end->work); ++ ext4_add_complete_io(io_end); + out: + bh->b_private = NULL; + bh->b_end_io = NULL; +diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c +index 9970022..5b24c40 100644 +--- a/fs/ext4/page-io.c ++++ b/fs/ext4/page-io.c +@@ -71,6 +71,7 @@ void ext4_free_io_end(ext4_io_end_t *io) + int i; + + BUG_ON(!io); ++ BUG_ON(!list_empty(&io->list)); + BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); + + if (io->page) +@@ -83,21 +84,14 @@ void ext4_free_io_end(ext4_io_end_t *io) + kmem_cache_free(io_end_cachep, io); + } + +-/* +- * check a range of space and convert unwritten extents to written. +- * +- * Called with inode->i_mutex; we depend on this when we manipulate +- * io->flag, since we could otherwise race with ext4_flush_completed_IO() +- */ +-int ext4_end_io_nolock(ext4_io_end_t *io) ++/* check a range of space and convert unwritten extents to written. */ ++static int ext4_end_io(ext4_io_end_t *io) + { + struct inode *inode = io->inode; + loff_t offset = io->offset; + ssize_t size = io->size; + int ret = 0; + +- BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); +- + ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," + "list->prev 0x%p\n", + io, inode->i_ino, io->list.next, io->list.prev); +@@ -110,7 +104,6 @@ int ext4_end_io_nolock(ext4_io_end_t *io) + "(inode %lu, offset %llu, size %zd, error %d)", + inode->i_ino, offset, size, ret); + } +- io->flag &= ~EXT4_IO_END_UNWRITTEN; + if (io->iocb) + aio_complete(io->iocb, io->result, 0); + +@@ -122,51 +115,122 @@ int ext4_end_io_nolock(ext4_io_end_t *io) + return ret; + } + +-/* +- * work on completed aio dio IO, to convert unwritten extents to extents +- */ +-static void ext4_end_io_work(struct work_struct *work) ++static void dump_completed_IO(struct inode *inode) ++{ ++#ifdef EXT4FS_DEBUG ++ struct list_head *cur, *before, *after; ++ ext4_io_end_t *io, *io0, *io1; ++ unsigned long flags; ++ ++ if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { ++ ext4_debug("inode %lu completed_io list is empty\n", ++ inode->i_ino); ++ return; ++ } ++ ++ ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino); ++ list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) { ++ cur = &io->list; ++ before = cur->prev; ++ io0 = container_of(before, ext4_io_end_t, list); ++ after = cur->next; ++ io1 = container_of(after, ext4_io_end_t, list); ++ ++ ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", ++ io, inode->i_ino, io0, io1); ++ } ++#endif ++} ++ ++/* Add the io_end to per-inode completed end_io list. */ ++void ext4_add_complete_io(ext4_io_end_t *io_end) + { +- ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); +- struct inode *inode = io->inode; +- struct ext4_inode_info *ei = EXT4_I(inode); +- unsigned long flags; ++ struct ext4_inode_info *ei = EXT4_I(io_end->inode); ++ struct workqueue_struct *wq; ++ unsigned long flags; ++ ++ BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); ++ wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; + + spin_lock_irqsave(&ei->i_completed_io_lock, flags); +- if (io->flag & EXT4_IO_END_IN_FSYNC) +- goto requeue; +- if (list_empty(&io->list)) { +- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); +- goto free; ++ if (list_empty(&ei->i_completed_io_list)) { ++ io_end->flag |= EXT4_IO_END_QUEUED; ++ queue_work(wq, &io_end->work); + } ++ list_add_tail(&io_end->list, &ei->i_completed_io_list); ++ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); ++} + +- if (!mutex_trylock(&inode->i_mutex)) { +- bool was_queued; +-requeue: +- was_queued = !!(io->flag & EXT4_IO_END_QUEUED); +- io->flag |= EXT4_IO_END_QUEUED; +- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); +- /* +- * Requeue the work instead of waiting so that the work +- * items queued after this can be processed. +- */ +- queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); +- /* +- * To prevent the ext4-dio-unwritten thread from keeping +- * requeueing end_io requests and occupying cpu for too long, +- * yield the cpu if it sees an end_io request that has already +- * been requeued. +- */ +- if (was_queued) +- yield(); +- return; ++static int ext4_do_flush_completed_IO(struct inode *inode, ++ ext4_io_end_t *work_io) ++{ ++ ext4_io_end_t *io; ++ struct list_head unwritten, complete, to_free; ++ unsigned long flags; ++ struct ext4_inode_info *ei = EXT4_I(inode); ++ int err, ret = 0; ++ ++ INIT_LIST_HEAD(&complete); ++ INIT_LIST_HEAD(&to_free); ++ ++ spin_lock_irqsave(&ei->i_completed_io_lock, flags); ++ dump_completed_IO(inode); ++ list_replace_init(&ei->i_completed_io_list, &unwritten); ++ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); ++ ++ while (!list_empty(&unwritten)) { ++ io = list_entry(unwritten.next, ext4_io_end_t, list); ++ BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); ++ list_del_init(&io->list); ++ ++ err = ext4_end_io(io); ++ if (unlikely(!ret && err)) ++ ret = err; ++ ++ list_add_tail(&io->list, &complete); ++ } ++ /* It is important to update all flags for all end_io in one shot w/o ++ * dropping the lock.*/ ++ spin_lock_irqsave(&ei->i_completed_io_lock, flags); ++ while (!list_empty(&complete)) { ++ io = list_entry(complete.next, ext4_io_end_t, list); ++ io->flag &= ~EXT4_IO_END_UNWRITTEN; ++ /* end_io context can not be destroyed now because it still ++ * used by queued worker. Worker thread will destroy it later */ ++ if (io->flag & EXT4_IO_END_QUEUED) ++ list_del_init(&io->list); ++ else ++ list_move(&io->list, &to_free); ++ } ++ /* If we are called from worker context, it is time to clear queued ++ * flag, and destroy it's end_io if it was converted already */ ++ if (work_io) { ++ work_io->flag &= ~EXT4_IO_END_QUEUED; ++ if (!(work_io->flag & EXT4_IO_END_UNWRITTEN)) ++ list_add_tail(&work_io->list, &to_free); + } +- list_del_init(&io->list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); +- (void) ext4_end_io_nolock(io); +- mutex_unlock(&inode->i_mutex); +-free: +- ext4_free_io_end(io); ++ ++ while (!list_empty(&to_free)) { ++ io = list_entry(to_free.next, ext4_io_end_t, list); ++ list_del_init(&io->list); ++ ext4_free_io_end(io); ++ } ++ return ret; ++} ++ ++/* ++ * work on completed aio dio IO, to convert unwritten extents to extents ++ */ ++static void ext4_end_io_work(struct work_struct *work) ++{ ++ ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); ++ ext4_do_flush_completed_IO(io->inode, io); ++} ++ ++int ext4_flush_completed_IO(struct inode *inode) ++{ ++ return ext4_do_flush_completed_IO(inode, NULL); + } + + ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) +@@ -199,9 +263,7 @@ static void buffer_io_error(struct buffer_head *bh) + static void ext4_end_bio(struct bio *bio, int error) + { + ext4_io_end_t *io_end = bio->bi_private; +- struct workqueue_struct *wq; + struct inode *inode; +- unsigned long flags; + int i; + sector_t bi_sector = bio->bi_sector; + +@@ -259,14 +321,7 @@ static void ext4_end_bio(struct bio *bio, int error) + return; + } + +- /* Add the io_end to per-inode completed io list*/ +- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); +- list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); +- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); +- +- wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; +- /* queue the work to convert unwritten extents to written */ +- queue_work(wq, &io_end->work); ++ ext4_add_complete_io(io_end); + } + + void ext4_io_submit(struct ext4_io_submit *io) +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0005-ext4-serialize-dio-nonlocked-reads-with-defrag-worke.patch b/freed-ora/current/f17/0005-ext4-serialize-dio-nonlocked-reads-with-defrag-worke.patch new file mode 100644 index 000000000..cf53b8dab --- /dev/null +++ b/freed-ora/current/f17/0005-ext4-serialize-dio-nonlocked-reads-with-defrag-worke.patch @@ -0,0 +1,144 @@ +From 994f567b2e99c82913a279ff438269c771b68a4b Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Sat, 29 Sep 2012 00:41:21 -0400 +Subject: [PATCH 05/13] ext4: serialize dio nonlocked reads with defrag + workers + +Inode's block defrag and ext4_change_inode_journal_flag() may +affect nonlocked DIO reads result, so proper synchronization +required. + +- Add missed inode_dio_wait() calls where appropriate +- Check inode state under extra i_dio_count reference. + +Reviewed-by: Jan Kara <jack@suse.cz> +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit 17335dcc471199717839b2fa3492ca36f70f1168) + +Conflicts: + fs/ext4/move_extent.c +--- + fs/ext4/ext4.h | 17 +++++++++++++++++ + fs/ext4/indirect.c | 14 ++++++++++++++ + fs/ext4/inode.c | 5 +++++ + fs/ext4/move_extent.c | 8 ++++++++ + 4 files changed, 44 insertions(+) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 7687d15..3e740e9 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1352,6 +1352,8 @@ enum { + EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ + EXT4_STATE_NEWENTRY, /* File just added to dir */ + EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ ++ EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read ++ nolocking */ + }; + + #define EXT4_INODE_BIT_FNS(name, field, offset) \ +@@ -2459,6 +2461,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) + set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); + } + ++/* ++ * Disable DIO read nolock optimization, so new dioreaders will be forced ++ * to grab i_mutex ++ */ ++static inline void ext4_inode_block_unlocked_dio(struct inode *inode) ++{ ++ ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); ++ smp_mb(); ++} ++static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) ++{ ++ smp_mb(); ++ ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); ++} ++ + #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + + /* For ioend & aio unwritten conversion wait queues */ +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index 61f13e5..8d849da 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -810,11 +810,25 @@ retry: + if (unlikely(!list_empty(&ei->i_completed_io_list))) + ext4_flush_completed_IO(inode); + ++ /* ++ * Nolock dioread optimization may be dynamically disabled ++ * via ext4_inode_block_unlocked_dio(). Check inode's state ++ * while holding extra i_dio_count ref. ++ */ ++ atomic_inc(&inode->i_dio_count); ++ smp_mb(); ++ if (unlikely(ext4_test_inode_state(inode, ++ EXT4_STATE_DIOREAD_LOCK))) { ++ inode_dio_done(inode); ++ goto locked; ++ } + ret = __blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block, NULL, NULL, 0); ++ inode_dio_done(inode); + } else { ++locked: + ret = blockdev_direct_IO(rw, iocb, inode, iov, + offset, nr_segs, ext4_get_block); + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index dd3fd23..2bd7526 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4706,6 +4706,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) + return err; + } + ++ /* Wait for all existing dio workers */ ++ ext4_inode_block_unlocked_dio(inode); ++ inode_dio_wait(inode); ++ + jbd2_journal_lock_updates(journal); + + /* +@@ -4725,6 +4729,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) + ext4_set_aops(inode); + + jbd2_journal_unlock_updates(journal); ++ ext4_inode_resume_unlocked_dio(inode); + + /* Finally we can mark the inode as dirty. */ + +diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c +index c5826c6..fd1e32e 100644 +--- a/fs/ext4/move_extent.c ++++ b/fs/ext4/move_extent.c +@@ -1214,6 +1214,12 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, + /* Protect orig and donor inodes against a truncate */ + mext_inode_double_lock(orig_inode, donor_inode); + ++ /* Wait for all existing dio workers */ ++ ext4_inode_block_unlocked_dio(orig_inode); ++ ext4_inode_block_unlocked_dio(donor_inode); ++ inode_dio_wait(orig_inode); ++ inode_dio_wait(donor_inode); ++ + /* Protect extent tree against block allocations via delalloc */ + double_down_write_data_sem(orig_inode, donor_inode); + /* Check the filesystem environment whether move_extent can be done */ +@@ -1413,6 +1419,8 @@ out: + kfree(holecheck_path); + } + double_up_write_data_sem(orig_inode, donor_inode); ++ ext4_inode_resume_unlocked_dio(orig_inode); ++ ext4_inode_resume_unlocked_dio(donor_inode); + mext_inode_double_unlock(orig_inode, donor_inode); + + return ret; +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0006-ext4-serialize-unlocked-dio-reads-with-truncate.patch b/freed-ora/current/f17/0006-ext4-serialize-unlocked-dio-reads-with-truncate.patch new file mode 100644 index 000000000..bddcc6024 --- /dev/null +++ b/freed-ora/current/f17/0006-ext4-serialize-unlocked-dio-reads-with-truncate.patch @@ -0,0 +1,65 @@ +From 4c4679fc02744ec3955e88faf5e8b6844fa8cbd3 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Sat, 29 Sep 2012 00:55:23 -0400 +Subject: [PATCH 06/13] ext4: serialize unlocked dio reads with truncate + +Current serialization will works only for DIO which holds +i_mutex, but nonlocked DIO following race is possible: + +dio_nolock_read_task truncate_task + ->ext4_setattr() + ->inode_dio_wait() +->ext4_ext_direct_IO + ->ext4_ind_direct_IO + ->__blockdev_direct_IO + ->ext4_get_block + ->truncate_setsize() + ->ext4_truncate() + #alloc truncated blocks + #to other inode + ->submit_io() + #INFORMATION LEAK + +In order to serialize with unlocked DIO reads we have to +rearrange wait sequence +1) update i_size first +2) if i_size about to be reduced wait for outstanding DIO requests +3) and only after that truncate inode blocks + +Reviewed-by: Jan Kara <jack@suse.cz> +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit 1c9114f9c0f10f58dd7e568a7152025af47b27e5) +--- + fs/ext4/inode.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 2bd7526..b84322d 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4277,7 +4277,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) + } + + if (attr->ia_valid & ATTR_SIZE) { +- inode_dio_wait(inode); + + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); +@@ -4326,8 +4325,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) + } + + if (attr->ia_valid & ATTR_SIZE) { +- if (attr->ia_size != i_size_read(inode)) ++ if (attr->ia_size != i_size_read(inode)) { + truncate_setsize(inode, attr->ia_size); ++ /* Inode size will be reduced, wait for dio in flight */ ++ if (orphan) ++ inode_dio_wait(inode); ++ } + ext4_truncate(inode); + } + +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0007-ext4-endless-truncate-due-to-nonlocked-dio-readers.patch b/freed-ora/current/f17/0007-ext4-endless-truncate-due-to-nonlocked-dio-readers.patch new file mode 100644 index 000000000..768215f48 --- /dev/null +++ b/freed-ora/current/f17/0007-ext4-endless-truncate-due-to-nonlocked-dio-readers.patch @@ -0,0 +1,41 @@ +From ab7b8a329e12369d58e5fa59ba2e2c90370f12ef Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Sat, 29 Sep 2012 00:56:15 -0400 +Subject: [PATCH 07/13] ext4: endless truncate due to nonlocked dio readers + +If we have enough aggressive DIO readers, truncate and other dio +waiters will wait forever inside inode_dio_wait(). It is reasonable +to disable nonlock DIO read optimization during truncate. + +Reviewed-by: Jan Kara <jack@suse.cz> +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit 1b65007e9870e0021397b548e8cd6bbc584f9152) +--- + fs/ext4/inode.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index b84322d..3b03dd6 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4327,9 +4327,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) + if (attr->ia_valid & ATTR_SIZE) { + if (attr->ia_size != i_size_read(inode)) { + truncate_setsize(inode, attr->ia_size); +- /* Inode size will be reduced, wait for dio in flight */ +- if (orphan) ++ /* Inode size will be reduced, wait for dio in flight. ++ * Temporarily disable dioread_nolock to prevent ++ * livelock. */ ++ if (orphan) { ++ ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); ++ ext4_inode_resume_unlocked_dio(inode); ++ } + } + ext4_truncate(inode); + } +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0008-ext4-serialize-truncate-with-owerwrite-DIO-workers.patch b/freed-ora/current/f17/0008-ext4-serialize-truncate-with-owerwrite-DIO-workers.patch new file mode 100644 index 000000000..c7733ed74 --- /dev/null +++ b/freed-ora/current/f17/0008-ext4-serialize-truncate-with-owerwrite-DIO-workers.patch @@ -0,0 +1,61 @@ +From 69e4026a2d104ffcf1b935bc889f8abcbfbb29ec Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Sat, 29 Sep 2012 00:58:26 -0400 +Subject: [PATCH 08/13] ext4: serialize truncate with owerwrite DIO workers + +Jan Kara have spotted interesting issue: +There are potential data corruption issue with direct IO overwrites +racing with truncate: + Like: + dio write truncate_task + ->ext4_ext_direct_IO + ->overwrite == 1 + ->down_read(&EXT4_I(inode)->i_data_sem); + ->mutex_unlock(&inode->i_mutex); + ->ext4_setattr() + ->inode_dio_wait() + ->truncate_setsize() + ->ext4_truncate() + ->down_write(&EXT4_I(inode)->i_data_sem); + ->__blockdev_direct_IO + ->ext4_get_block + ->submit_io() + ->up_read(&EXT4_I(inode)->i_data_sem); + # truncate data blocks, allocate them to + # other inode - bad stuff happens because + # dio is still in flight. + +In order to serialize with truncate dio worker should grab extra i_dio_count +reference before drop i_mutex. + +Reviewed-by: Jan Kara <jack@suse.cz> +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit 1f555cfa29e8f787d675e8390f88ce517a37271a) +--- + fs/ext4/inode.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 3b03dd6..484a327 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3008,6 +3008,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, + overwrite = *((int *)iocb->private); + + if (overwrite) { ++ atomic_inc(&inode->i_dio_count); + down_read(&EXT4_I(inode)->i_data_sem); + mutex_unlock(&inode->i_mutex); + } +@@ -3105,6 +3106,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, + retake_lock: + /* take i_mutex locking again if we do a ovewrite dio */ + if (overwrite) { ++ inode_dio_done(inode); + up_read(&EXT4_I(inode)->i_data_sem); + mutex_lock(&inode->i_mutex); + } +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0009-ext4-punch_hole-should-wait-for-DIO-writers.patch b/freed-ora/current/f17/0009-ext4-punch_hole-should-wait-for-DIO-writers.patch new file mode 100644 index 000000000..4d7636668 --- /dev/null +++ b/freed-ora/current/f17/0009-ext4-punch_hole-should-wait-for-DIO-writers.patch @@ -0,0 +1,125 @@ +From 71a6398a4b59ddcf920dfb68872b5a771c606e3a Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Sun, 30 Sep 2012 23:03:42 -0400 +Subject: [PATCH 09/13] ext4: punch_hole should wait for DIO writers + +punch_hole is the place where we have to wait for all existing writers +(writeback, aio, dio), but currently we simply flush pended end_io request +which is not sufficient. Other issue is that punch_hole performed w/o i_mutex +held which obviously result in dangerous data corruption due to +write-after-free. + +This patch performs following changes: +- Guard punch_hole with i_mutex +- Recheck inode flags under i_mutex +- Block all new dio readers in order to prevent information leak caused by + read-after-free pattern. +- punch_hole now wait for all writers in flight + NOTE: XXX write-after-free race is still possible because new dirty pages + may appear due to mmap(), and currently there is no easy way to stop + writeback while punch_hole is in progress. + +[ Fixed error return from ext4_ext_punch_hole() to make sure that we + release i_mutex before returning EPERM or ETXTBUSY -- Ted ] + +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit 02d262dffcf4c74e5c4612ee736bdb94f18ed5b9) +--- + fs/ext4/extents.c | 53 ++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 36 insertions(+), 17 deletions(-) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 1fbf2ff..202eb4d 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4776,9 +4776,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) + loff_t first_page_offset, last_page_offset; + int credits, err = 0; + ++ /* ++ * Write out all dirty pages to avoid race conditions ++ * Then release them. ++ */ ++ if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { ++ err = filemap_write_and_wait_range(mapping, ++ offset, offset + length - 1); ++ ++ if (err) ++ return err; ++ } ++ ++ mutex_lock(&inode->i_mutex); ++ /* It's not possible punch hole on append only file */ ++ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { ++ err = -EPERM; ++ goto out_mutex; ++ } ++ if (IS_SWAPFILE(inode)) { ++ err = -ETXTBSY; ++ goto out_mutex; ++ } ++ + /* No need to punch hole beyond i_size */ + if (offset >= inode->i_size) +- return 0; ++ goto out_mutex; + + /* + * If the hole extends beyond i_size, set the hole +@@ -4796,33 +4819,25 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) + first_page_offset = first_page << PAGE_CACHE_SHIFT; + last_page_offset = last_page << PAGE_CACHE_SHIFT; + +- /* +- * Write out all dirty pages to avoid race conditions +- * Then release them. +- */ +- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { +- err = filemap_write_and_wait_range(mapping, +- offset, offset + length - 1); +- +- if (err) +- return err; +- } +- + /* Now release the pages */ + if (last_page_offset > first_page_offset) { + truncate_pagecache_range(inode, first_page_offset, + last_page_offset - 1); + } + +- /* finish any pending end_io work */ ++ /* Wait all existing dio workers, newcomers will block on i_mutex */ ++ ext4_inode_block_unlocked_dio(inode); ++ inode_dio_wait(inode); + err = ext4_flush_completed_IO(inode); + if (err) +- return err; ++ goto out_dio; + + credits = ext4_writepage_trans_blocks(inode); + handle = ext4_journal_start(inode, credits); +- if (IS_ERR(handle)) +- return PTR_ERR(handle); ++ if (IS_ERR(handle)) { ++ err = PTR_ERR(handle); ++ goto out_dio; ++ } + + err = ext4_orphan_add(handle, inode); + if (err) +@@ -4916,6 +4931,10 @@ out: + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); ++out_dio: ++ ext4_inode_resume_unlocked_dio(inode); ++out_mutex: ++ mutex_unlock(&inode->i_mutex); + return err; + } + int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0010-ext4-fix-ext_remove_space-for-punch_hole-case.patch b/freed-ora/current/f17/0010-ext4-fix-ext_remove_space-for-punch_hole-case.patch new file mode 100644 index 000000000..d161bb765 --- /dev/null +++ b/freed-ora/current/f17/0010-ext4-fix-ext_remove_space-for-punch_hole-case.patch @@ -0,0 +1,60 @@ +From 66d08dd92b82dabfd64853aa4edde1547fdf9ef7 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Sun, 30 Sep 2012 23:03:50 -0400 +Subject: [PATCH 10/13] ext4: fix ext_remove_space for punch_hole case + +Inode is allowed to have empty leaf only if it this is blockless inode. + +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit 6f2080e64487b9963f9c6ff8a252e1abce98f2d4) +--- + fs/ext4/extents.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 202eb4d..b1c92c0 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -2572,7 +2572,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, + struct ext4_ext_path *path = NULL; + ext4_fsblk_t partial_cluster = 0; + handle_t *handle; +- int i = 0, err; ++ int i = 0, err = 0; + + ext_debug("truncate since %u to %u\n", start, end); + +@@ -2604,12 +2604,16 @@ again: + return PTR_ERR(path); + } + depth = ext_depth(inode); ++ /* Leaf not may not exist only if inode has no blocks at all */ + ex = path[depth].p_ext; + if (!ex) { +- ext4_ext_drop_refs(path); +- kfree(path); +- path = NULL; +- goto cont; ++ if (depth) { ++ EXT4_ERROR_INODE(inode, ++ "path[%d].p_hdr == NULL", ++ depth); ++ err = -EIO; ++ } ++ goto out; + } + + ee_block = le32_to_cpu(ex->ee_block); +@@ -2641,8 +2645,6 @@ again: + goto out; + } + } +-cont: +- + /* + * We start scanning from right side, freeing all the blocks + * after i_size and walking into the tree depth-wise. +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0011-ext4-fix-ext4_flush_completed_IO-wait-semantics.patch b/freed-ora/current/f17/0011-ext4-fix-ext4_flush_completed_IO-wait-semantics.patch new file mode 100644 index 000000000..517b20129 --- /dev/null +++ b/freed-ora/current/f17/0011-ext4-fix-ext4_flush_completed_IO-wait-semantics.patch @@ -0,0 +1,176 @@ +From ca6d3910cbf8854f3f3b9846391f669733899101 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Fri, 5 Oct 2012 11:31:55 -0400 +Subject: [PATCH 11/13] ext4: fix ext4_flush_completed_IO wait semantics + +BUG #1) All places where we call ext4_flush_completed_IO are broken + because buffered io and DIO/AIO goes through three stages + 1) submitted io, + 2) completed io (in i_completed_io_list) conversion pended + 3) finished io (conversion done) + And by calling ext4_flush_completed_IO we will flush only + requests which were in (2) stage, which is wrong because: + 1) punch_hole and truncate _must_ wait for all outstanding unwritten io + regardless to it's state. + 2) fsync and nolock_dio_read should also wait because there is + a time window between end_page_writeback() and ext4_add_complete_io() + As result integrity fsync is broken in case of buffered write + to fallocated region: + fsync blkdev_completion + ->filemap_write_and_wait_range + ->ext4_end_bio + ->end_page_writeback + <-- filemap_write_and_wait_range return + ->ext4_flush_completed_IO + sees empty i_completed_io_list but pended + conversion still exist + ->ext4_add_complete_io + +BUG #2) Race window becomes wider due to the 'ext4: completed_io +locking cleanup V4' patch series + +This patch make following changes: +1) ext4_flush_completed_io() now first try to flush completed io and when + wait for any outstanding unwritten io via ext4_unwritten_wait() +2) Rename function to more appropriate name. +3) Assert that all callers of ext4_flush_unwritten_io should hold i_mutex to + prevent endless wait + +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +Reviewed-by: Jan Kara <jack@suse.cz> +(cherry picked from commit c278531d39f3158bfee93dc67da0b77e09776de2) +--- + fs/ext4/ext4.h | 3 ++- + fs/ext4/extents.c | 6 +++--- + fs/ext4/file.c | 2 +- + fs/ext4/fsync.c | 2 +- + fs/ext4/indirect.c | 8 +++++--- + fs/ext4/page-io.c | 11 +++++++---- + 6 files changed, 19 insertions(+), 13 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 3e740e9..7f13292 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1941,7 +1941,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p); + + /* fsync.c */ + extern int ext4_sync_file(struct file *, loff_t, loff_t, int); +-extern int ext4_flush_completed_IO(struct inode *); ++extern int ext4_flush_unwritten_io(struct inode *); + + /* hash.c */ + extern int ext4fs_dirhash(const char *name, int len, struct +@@ -2361,6 +2361,7 @@ extern const struct file_operations ext4_dir_operations; + extern const struct inode_operations ext4_file_inode_operations; + extern const struct file_operations ext4_file_operations; + extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); ++extern void ext4_unwritten_wait(struct inode *inode); + + /* namei.c */ + extern const struct inode_operations ext4_dir_inode_operations; +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index b1c92c0..37f46eb 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4250,7 +4250,7 @@ void ext4_ext_truncate(struct inode *inode) + * finish any pending end_io work so we won't run the risk of + * converting any truncated blocks to initialized later + */ +- ext4_flush_completed_IO(inode); ++ ext4_flush_unwritten_io(inode); + + /* + * probably first extent we're gonna free will be last in block +@@ -4829,10 +4829,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) + + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); +- inode_dio_wait(inode); +- err = ext4_flush_completed_IO(inode); ++ err = ext4_flush_unwritten_io(inode); + if (err) + goto out_dio; ++ inode_dio_wait(inode); + + credits = ext4_writepage_trans_blocks(inode); + handle = ext4_journal_start(inode, credits); +diff --git a/fs/ext4/file.c b/fs/ext4/file.c +index 39335bd..ca6f07a 100644 +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -55,7 +55,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) + return 0; + } + +-static void ext4_unwritten_wait(struct inode *inode) ++void ext4_unwritten_wait(struct inode *inode) + { + wait_queue_head_t *wq = ext4_ioend_wq(inode); + +diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c +index 520b058..76051c6 100644 +--- a/fs/ext4/fsync.c ++++ b/fs/ext4/fsync.c +@@ -138,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) + if (inode->i_sb->s_flags & MS_RDONLY) + goto out; + +- ret = ext4_flush_completed_IO(inode); ++ ret = ext4_flush_unwritten_io(inode); + if (ret < 0) + goto out; + +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index 8d849da..792e388 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -807,9 +807,11 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, + + retry: + if (rw == READ && ext4_should_dioread_nolock(inode)) { +- if (unlikely(!list_empty(&ei->i_completed_io_list))) +- ext4_flush_completed_IO(inode); +- ++ if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) { ++ mutex_lock(&inode->i_mutex); ++ ext4_flush_unwritten_io(inode); ++ mutex_unlock(&inode->i_mutex); ++ } + /* + * Nolock dioread optimization may be dynamically disabled + * via ext4_inode_block_unlocked_dio(). Check inode's state +diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c +index 5b24c40..68e896e 100644 +--- a/fs/ext4/page-io.c ++++ b/fs/ext4/page-io.c +@@ -189,8 +189,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode, + + list_add_tail(&io->list, &complete); + } +- /* It is important to update all flags for all end_io in one shot w/o +- * dropping the lock.*/ + spin_lock_irqsave(&ei->i_completed_io_lock, flags); + while (!list_empty(&complete)) { + io = list_entry(complete.next, ext4_io_end_t, list); +@@ -228,9 +226,14 @@ static void ext4_end_io_work(struct work_struct *work) + ext4_do_flush_completed_IO(io->inode, io); + } + +-int ext4_flush_completed_IO(struct inode *inode) ++int ext4_flush_unwritten_io(struct inode *inode) + { +- return ext4_do_flush_completed_IO(inode, NULL); ++ int ret; ++ WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && ++ !(inode->i_state & I_FREEING)); ++ ret = ext4_do_flush_completed_IO(inode, NULL); ++ ext4_unwritten_wait(inode); ++ return ret; + } + + ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/0012-ext4-serialize-fallocate-with-ext4_convert_unwritten.patch b/freed-ora/current/f17/0012-ext4-serialize-fallocate-with-ext4_convert_unwritten.patch new file mode 100644 index 000000000..3fcaef1f2 --- /dev/null +++ b/freed-ora/current/f17/0012-ext4-serialize-fallocate-with-ext4_convert_unwritten.patch @@ -0,0 +1,46 @@ +From 9f00d109efeaf4d12d56c8e46cd13af80e344f97 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov <dmonakhov@openvz.org> +Date: Fri, 5 Oct 2012 11:32:02 -0400 +Subject: [PATCH 12/13] ext4: serialize fallocate with + ext4_convert_unwritten_extents + +Fallocate should wait for pended ext4_convert_unwritten_extents() +otherwise following race may happen: + +ftruncate( ,12288); +fallocate( ,0, 4096) +io_sibmit( ,0, 4096); /* Write to fallocated area, split extent if needed */ +fallocate( ,0, 8192); /* Grow extent and broke assumption about extent */ + +Later kwork completion will do: + ->ext4_convert_unwritten_extents (0, 4096) + ->ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_IO_CONVERT_EXT); + ->ext4_ext_map_blocks() /* Will find new extent: ex = [0,2] !!!!!! */ + ->ext4_ext_handle_uninitialized_extents() + ->ext4_convert_unwritten_extents_endio() + /* convert [0,2] extent to initialized, but only[0,1] was written */ + +Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> +Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> +(cherry picked from commit 60d4616f3dc63371b3dc367e5e88fd4b4f037f65) +--- + fs/ext4/extents.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 37f46eb..ea2db86 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -4410,6 +4410,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) + */ + if (len <= EXT_UNINIT_MAX_LEN << blkbits) + flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; ++ ++ /* Prevent race condition between unwritten */ ++ ext4_flush_unwritten_io(inode); + retry: + while (ret >= 0 && ret < max_blocks) { + map.m_lblk = map.m_lblk + ret; +-- +1.7.12.rc0.22.gcdd159b + diff --git a/freed-ora/current/f17/config-arm-generic b/freed-ora/current/f17/config-arm-generic index 8e5cb9ded..213ef786e 100644 --- a/freed-ora/current/f17/config-arm-generic +++ b/freed-ora/current/f17/config-arm-generic @@ -292,6 +292,8 @@ CONFIG_REGULATOR_TPS6507X=m CONFIG_CHARGER_MANAGER=y CONFIG_EXTCON_GPIO=m +# CONFIG_VFIO is not set + # CONFIG_XIP_KERNEL is not set # CONFIG_CPU_ICACHE_DISABLE is not set # CONFIG_CPU_DCACHE_DISABLE is not set diff --git a/freed-ora/current/f17/deblob-check b/freed-ora/current/f17/deblob-check index 8c4baace6..248da50c5 100755 --- a/freed-ora/current/f17/deblob-check +++ b/freed-ora/current/f17/deblob-check @@ -1,6 +1,6 @@ #! /bin/sh -# deblob-check version 2012-09-26 +# deblob-check version 2012-10-29 # Inspired in gNewSense's find-firmware script. # Written by Alexandre Oliva <lxoliva@fsfla.org> @@ -3153,6 +3153,11 @@ set_except () { accept 'P[13]\([\n]#[^\n]*\)*[\n]*\([\n][0-9 ]*\)\+' drivers/video/logo/logo_libre_clut224.ppm ;; + */patch-3.6*) + # Present in patch for 3.6.4. + accept 'MODULE_FIRMWARE[(]["]keyspan_pda[/]\(keyspan_pda\|xircom_pgs\)\.fw["][)][;]' drivers/usb/serial/keyspan_pda.c + ;; + */patch-3.5*) accept '[;][/][*]@@[ ]-[0-9]*,[0-9]*[ ][+][0-9]*,[0-9]*[ ]@@[ ]static[ ]void[ ]b43_request_firmware[(]' drivers/net/wireless/b43/main.c accept '[ ][*][ ][ ][ ]3[ ]3[ ]2[ ]2[ ]2[ ]2[ ]2[ ]2[ ]2[ ]2[ ]2[ ]2[ ]1[ ]1[ ]1[ ]1[ ]1[ ]1[ ]1[ ]1[ ]1[ ]1[\n][ ][*][ ][ ][ ]1[ ]0[ ]9[ ]8[ ]7[ ]6[ ]5[ ]4[ ]3[ ]2[ ]1[ ]0[ ]9[ ]8[ ]7[ ]6[ ]5[ ]4[ ]3[ ]2[ ]1[ ]0[ ]9[ ]8[ ]7[ ]6[ ]5[ ]4[ ]3[ ]2[ ]1[ ]0' arch/arm/include/asm/pgtable.h diff --git a/freed-ora/current/f17/dont-call-cifs_lookup-on-hashed-negative-dentry.patch b/freed-ora/current/f17/dont-call-cifs_lookup-on-hashed-negative-dentry.patch new file mode 100644 index 000000000..88b35e2f8 --- /dev/null +++ b/freed-ora/current/f17/dont-call-cifs_lookup-on-hashed-negative-dentry.patch @@ -0,0 +1,21 @@ +@@ -, +, @@ + negative dentry + BUG_ON(!d_unhashed(entry)); + fs/cifs/dir.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) +--- a/fs/cifs/dir.c ++++ a/fs/cifs/dir.c +@@ -398,7 +398,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, + * in network traffic in the other paths. + */ + if (!(oflags & O_CREAT)) { +- struct dentry *res = cifs_lookup(inode, direntry, 0); ++ struct dentry *res; ++ ++ if (!direntry->d_inode) ++ return -ENOENT; ++ ++ res = cifs_lookup(inode, direntry, 0); + if (IS_ERR(res)) + return PTR_ERR(res); + diff --git a/freed-ora/current/f17/fix-stack-memory-content-leak-via-UNAME26.patch b/freed-ora/current/f17/fix-stack-memory-content-leak-via-UNAME26.patch deleted file mode 100644 index 5121ca06b..000000000 --- a/freed-ora/current/f17/fix-stack-memory-content-leak-via-UNAME26.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 2702b1526c7278c4d65d78de209a465d4de2885e Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Fri, 19 Oct 2012 13:56:51 -0700 -Subject: [PATCH 1/2] kernel/sys.c: fix stack memory content leak via UNAME26 - -Calling uname() with the UNAME26 personality set allows a leak of kernel -stack contents. This fixes it by defensively calculating the length of -copy_to_user() call, making the len argument unsigned, and initializing -the stack buffer to zero (now technically unneeded, but hey, overkill). - -CVE-2012-0957 - -Reported-by: PaX Team <pageexec@freemail.hu> -Signed-off-by: Kees Cook <keescook@chromium.org> -Cc: Andi Kleen <ak@linux.intel.com> -Cc: PaX Team <pageexec@freemail.hu> -Cc: Brad Spengler <spender@grsecurity.net> -Cc: <stable@vger.kernel.org> -Signed-off-by: Andrew Morton <akpm@linux-foundation.org> -Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> ---- - kernel/sys.c | 12 +++++++----- - 1 file changed, 7 insertions(+), 5 deletions(-) - -diff --git a/kernel/sys.c b/kernel/sys.c -index c5cb5b9..01865c6 100644 ---- a/kernel/sys.c -+++ b/kernel/sys.c -@@ -1265,15 +1265,16 @@ DECLARE_RWSEM(uts_sem); - * Work around broken programs that cannot handle "Linux 3.0". - * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 - */ --static int override_release(char __user *release, int len) -+static int override_release(char __user *release, size_t len) - { - int ret = 0; -- char buf[65]; - - if (current->personality & UNAME26) { -- char *rest = UTS_RELEASE; -+ const char *rest = UTS_RELEASE; -+ char buf[65] = { 0 }; - int ndots = 0; - unsigned v; -+ size_t copy; - - while (*rest) { - if (*rest == '.' && ++ndots >= 3) -@@ -1283,8 +1284,9 @@ static int override_release(char __user *release, int len) - rest++; - } - v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; -- snprintf(buf, len, "2.6.%u%s", v, rest); -- ret = copy_to_user(release, buf, len); -+ copy = min(sizeof(buf), max_t(size_t, 1, len)); -+ copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); -+ ret = copy_to_user(release, buf, copy + 1); - } - return ret; - } --- -1.7.12.1 - - -From 31fd84b95eb211d5db460a1dda85e004800a7b52 Mon Sep 17 00:00:00 2001 -From: Kees Cook <keescook@chromium.org> -Date: Fri, 19 Oct 2012 18:45:53 -0700 -Subject: [PATCH 2/2] use clamp_t in UNAME26 fix - -The min/max call needed to have explicit types on some architectures -(e.g. mn10300). Use clamp_t instead to avoid the warning: - - kernel/sys.c: In function 'override_release': - kernel/sys.c:1287:10: warning: comparison of distinct pointer types lacks a cast [enabled by default] - -Reported-by: Fengguang Wu <fengguang.wu@intel.com> -Signed-off-by: Kees Cook <keescook@chromium.org> -Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> ---- - kernel/sys.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sys.c b/kernel/sys.c -index 01865c6..e6e0ece 100644 ---- a/kernel/sys.c -+++ b/kernel/sys.c -@@ -1284,7 +1284,7 @@ static int override_release(char __user *release, size_t len) - rest++; - } - v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; -- copy = min(sizeof(buf), max_t(size_t, 1, len)); -+ copy = clamp_t(size_t, len, 1, sizeof(buf)); - copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); - ret = copy_to_user(release, buf, copy + 1); - } --- -1.7.12.1 - diff --git a/freed-ora/current/f17/kernel.spec b/freed-ora/current/f17/kernel.spec index ed51f208d..bac9c5b98 100644 --- a/freed-ora/current/f17/kernel.spec +++ b/freed-ora/current/f17/kernel.spec @@ -104,7 +104,7 @@ Summary: The Linux kernel %if 0%{?released_kernel} # Do we have a -stable update to apply? -%define stable_update 3 +%define stable_update 4 # Is it a -stable RC? %define stable_rc 0 # Set rpm version accordingly @@ -805,19 +805,30 @@ Patch22070: net-tcp-bz857324.patch #rhbz 770484 Patch22071: iwlwifi-fix-6000-ch-switch.patch +Patch22072: linux-3.6-arm-build-fixup.patch + #rhbz 862168 Patch22073: mac80211_local_deauth_v3.6.patch #rhbz 866013 Patch22074: mac80211-connect-with-HT20-if-HT40-is-not-permitted.patch -#rhbz 856863 -Patch22075: rt2x00-usb-fix-reset-resume.patch - -#rhbz 862877 864824 CVE-2012-0957 -Patch22076: fix-stack-memory-content-leak-via-UNAME26.patch - -Patch22072: linux-3.6-arm-build-fixup.patch +#rhbz 867344 +Patch22077: dont-call-cifs_lookup-on-hashed-negative-dentry.patch + +#rhbz 869904 869909 CVE-2012-4508 +Patch22080: 0001-ext4-ext4_inode_info-diet.patch +Patch22081: 0002-ext4-give-i_aiodio_unwritten-a-more-appropriate-name.patch +Patch22082: 0003-ext4-fix-unwritten-counter-leakage.patch +Patch22083: 0004-ext4-completed_io-locking-cleanup.patch +Patch22084: 0005-ext4-serialize-dio-nonlocked-reads-with-defrag-worke.patch +Patch22085: 0006-ext4-serialize-unlocked-dio-reads-with-truncate.patch +Patch22086: 0007-ext4-endless-truncate-due-to-nonlocked-dio-readers.patch +Patch22087: 0008-ext4-serialize-truncate-with-owerwrite-DIO-workers.patch +Patch22088: 0009-ext4-punch_hole-should-wait-for-DIO-writers.patch +Patch22089: 0010-ext4-fix-ext_remove_space-for-punch_hole-case.patch +Patch22090: 0011-ext4-fix-ext4_flush_completed_IO-wait-semantics.patch +Patch22091: 0012-ext4-serialize-fallocate-with-ext4_convert_unwritten.patch # END OF PATCH DEFINITIONS @@ -1589,11 +1600,22 @@ ApplyPatch mac80211_local_deauth_v3.6.patch #rhbz 866013 ApplyPatch mac80211-connect-with-HT20-if-HT40-is-not-permitted.patch -#rhbz 856863 -ApplyPatch rt2x00-usb-fix-reset-resume.patch - -#rhbz 862877 864824 CVE-2012-0957 -ApplyPatch fix-stack-memory-content-leak-via-UNAME26.patch +#rhbz 867344 +ApplyPatch dont-call-cifs_lookup-on-hashed-negative-dentry.patch + +#rhbz 869904 869909 CVE-2012-4508 +ApplyPatch 0001-ext4-ext4_inode_info-diet.patch +ApplyPatch 0002-ext4-give-i_aiodio_unwritten-a-more-appropriate-name.patch +ApplyPatch 0003-ext4-fix-unwritten-counter-leakage.patch +ApplyPatch 0004-ext4-completed_io-locking-cleanup.patch +ApplyPatch 0005-ext4-serialize-dio-nonlocked-reads-with-defrag-worke.patch +ApplyPatch 0006-ext4-serialize-unlocked-dio-reads-with-truncate.patch +ApplyPatch 0007-ext4-endless-truncate-due-to-nonlocked-dio-readers.patch +ApplyPatch 0008-ext4-serialize-truncate-with-owerwrite-DIO-workers.patch +ApplyPatch 0009-ext4-punch_hole-should-wait-for-DIO-writers.patch +ApplyPatch 0010-ext4-fix-ext_remove_space-for-punch_hole-case.patch +ApplyPatch 0011-ext4-fix-ext4_flush_completed_IO-wait-semantics.patch +ApplyPatch 0012-ext4-serialize-fallocate-with-ext4_convert_unwritten.patch # END OF PATCH APPLICATIONS @@ -2469,6 +2491,25 @@ fi # '-' | | # '-' %changelog +* Tue Oct 30 2012 Alexandre Oliva <lxoliva@fsfla.org> -libre +- GNU Linux-libre 3.6.4-gnu. + +* Mon Oct 29 2012 Justin M. Forbes <jforbes@redhat.com> 3.6.4-1 +- Linux 3.6.4 + +* Thu Oct 25 2012 Justin M. Forbes <jforbes@redhat.com> +- CVE-2012-4508: ext4: AIO vs fallocate stale data exposure (rhbz 869904 869909) + +* Wed Oct 24 2012 Josh Boyer <jwboyer@redhat.com> +- Remove patch added for rhbz 856863 +- Add patch to fix corrupted text with i915 (rhbz 852210) + +* Mon Oct 22 2012 Peter Robinson <pbrobinson@fedoraproject.org> +- VIFO fails on ARM at the moment so disable it for the time being + +* Mon Oct 22 2012 Josh Boyer <jwboyer@redhat.com> +- Add patch to fix CIFS oops from Jeff Layton (rhbz 867344) + * Mon Oct 22 2012 Alexandre Oliva <lxoliva@fsfla.org> -libre - GNU Linux-libre 3.6.3-gnu. diff --git a/freed-ora/current/f17/patch-3.6-gnu-3.6.3-gnu.xz.sign b/freed-ora/current/f17/patch-3.6-gnu-3.6.3-gnu.xz.sign deleted file mode 100644 index 053509ba2..000000000 --- a/freed-ora/current/f17/patch-3.6-gnu-3.6.3-gnu.xz.sign +++ /dev/null @@ -1,7 +0,0 @@ ------BEGIN PGP SIGNATURE----- -Version: GnuPG v2.0.18 (GNU/Linux) - -iEYEABECAAYFAlCEa+UACgkQvLfPh359R6fbgACgqgQmBiVcDYszqL24/52dQrRo -YScAoKFqylne7plL/wZPWAbBiPRoK1G4 -=B2Dv ------END PGP SIGNATURE----- diff --git a/freed-ora/current/f17/patch-3.6-gnu-3.6.4-gnu.xz.sign b/freed-ora/current/f17/patch-3.6-gnu-3.6.4-gnu.xz.sign new file mode 100644 index 000000000..ada040f2b --- /dev/null +++ b/freed-ora/current/f17/patch-3.6-gnu-3.6.4-gnu.xz.sign @@ -0,0 +1,7 @@ +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v2.0.18 (GNU/Linux) + +iEYEABECAAYFAlCNtIYACgkQvLfPh359R6f6LgCghgcmXXAKM/4f+w28gZEGjsdX +t5IAn1ZcTMcZdVl5O89zwCCgNlYJao33 +=HXqy +-----END PGP SIGNATURE----- diff --git a/freed-ora/current/f17/rt2x00-usb-fix-reset-resume.patch b/freed-ora/current/f17/rt2x00-usb-fix-reset-resume.patch deleted file mode 100644 index 07d2b4c0e..000000000 --- a/freed-ora/current/f17/rt2x00-usb-fix-reset-resume.patch +++ /dev/null @@ -1,70 +0,0 @@ -Patch fixes warnings like below happened on resume: - -WARNING: at net/mac80211/driver-ops.h:12 check_sdata_in_driver+0x32/0x34() - -Problem is that in __ieee80211_susped() we remove sdata (i.e wlan0 -interface) and then during resume we call usb_unbind_interface() -> -ieee80211_unregister_hw() with sdata removed. - -Patch fixes problem by adding .reset_resume calback, hence we do not -unbind usb device on resume. This callback can be the same as normal -.resume callback, sice we do all needed initalization during interface -start, which is performed on resume [ ieee80211_resume() -> -ieee80211_reconfig() -> rt2x00mac_start() -> rt2x00lib_start ]. - -Resolves: -https://bugzilla.kernel.org/show_bug.cgi?id=48041 - -Reported-by: David Herrmann <dh.herrmann@googlemail.com> -Reported-and-tested-by: Stephen Boyd <bebarino@gmail.com> -Cc: stable@vger.kernel.org -Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com> ---- - drivers/net/wireless/rt2x00/rt2500usb.c | 1 + - drivers/net/wireless/rt2x00/rt2800usb.c | 1 + - drivers/net/wireless/rt2x00/rt73usb.c | 1 + - 3 files changed, 3 insertions(+), 0 deletions(-) - -diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c -index a12e84f..6b2e1e4 100644 ---- a/drivers/net/wireless/rt2x00/rt2500usb.c -+++ b/drivers/net/wireless/rt2x00/rt2500usb.c -@@ -1988,6 +1988,7 @@ static struct usb_driver rt2500usb_driver = { - .disconnect = rt2x00usb_disconnect, - .suspend = rt2x00usb_suspend, - .resume = rt2x00usb_resume, -+ .reset_resume = rt2x00usb_resume, - .disable_hub_initiated_lpm = 1, - }; - -diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c -index c9e9370..3b8fb5a 100644 ---- a/drivers/net/wireless/rt2x00/rt2800usb.c -+++ b/drivers/net/wireless/rt2x00/rt2800usb.c -@@ -1282,6 +1282,7 @@ static struct usb_driver rt2800usb_driver = { - .disconnect = rt2x00usb_disconnect, - .suspend = rt2x00usb_suspend, - .resume = rt2x00usb_resume, -+ .reset_resume = rt2x00usb_resume, - .disable_hub_initiated_lpm = 1, - }; - -diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c -index e5eb43b..24eec66 100644 ---- a/drivers/net/wireless/rt2x00/rt73usb.c -+++ b/drivers/net/wireless/rt2x00/rt73usb.c -@@ -2535,6 +2535,7 @@ static struct usb_driver rt73usb_driver = { - .disconnect = rt2x00usb_disconnect, - .suspend = rt2x00usb_suspend, - .resume = rt2x00usb_resume, -+ .reset_resume = rt2x00usb_resume, - .disable_hub_initiated_lpm = 1, - }; - --- -1.7.1 - --- -To unsubscribe from this list: send the line "unsubscribe linux-wireless" in -the body of a message to majordomo@vger.kernel.org -More majordomo info at http://vger.kernel.org/majordomo-info.html
\ No newline at end of file diff --git a/freed-ora/current/f17/sources b/freed-ora/current/f17/sources index 66b8ecee1..d78296bbc 100644 --- a/freed-ora/current/f17/sources +++ b/freed-ora/current/f17/sources @@ -1,2 +1,2 @@ a2312edd0265b5b07bd4b50afae2b380 linux-libre-3.6-gnu.tar.xz -844bd14e9f58225274e096c31bbb8a8f patch-3.6-gnu-3.6.3-gnu.xz +71451d3fdb67db17581393a061bf9516 patch-3.6-gnu-3.6.4-gnu.xz |

