summaryrefslogtreecommitdiffstats
path: root/fs/direct-io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 20:42:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 20:42:54 -0800
commit099469502f62fbe0d7e4f0b83a2f22538367f734 (patch)
tree5229c3818b2e6e09d35026d49314047121130536 /fs/direct-io.c
parent7c17d86a8502c2e30c2eea777ed1b830aa3b447b (diff)
parent35f1526845a9d804206883e19bd257d3dcef758f (diff)
downloadtalos-op-linux-099469502f62fbe0d7e4f0b83a2f22538367f734.tar.gz
talos-op-linux-099469502f62fbe0d7e4f0b83a2f22538367f734.zip
Merge branch 'akpm' (aka "Andrew's patch-bomb, take two")
Andrew explains: - various misc stuff - Most of the rest of MM: memcg, threaded hugepages, others. - cpumask - kexec - kdump - some direct-io performance tweaking - radix-tree optimisations - new selftests code A note on this: often people will develop a new userspace-visible feature and will develop userspace code to exercise/test that feature. Then they merge the patch and the selftest code dies. Sometimes we paste it into the changelog. Sometimes the code gets thrown into Documentation/(!). This saddens me. So this patch creates a bare-bones framework which will henceforth allow me to ask people to include their test apps in the kernel tree so we can keep them alive. Then when people enhance or fix the feature, I can ask them to update the test app too. The infrastruture is terribly trivial at present - let's see how it evolves. - checkpoint/restart feature work. A note on this: this is a project by various mad Russians to perform c/r mainly from userspace, with various oddball helper code added into the kernel where the need is demonstrated. So rather than some large central lump of code, what we have is little bits and pieces popping up in various places which either expose something new or which permit something which is normally kernel-private to be modified. The overall project is an ongoing thing. I've judged that the size and scope of the thing means that we're more likely to be successful with it if we integrate the support into mainline piecemeal rather than allowing it all to develop out-of-tree. However I'm less confident than the developers that it will all eventually work! So what I'm asking them to do is to wrap each piece of new code inside CONFIG_CHECKPOINT_RESTORE. So if it all eventually comes to tears and the project as a whole fails, it should be a simple matter to go through and delete all trace of it. This lot pretty much wraps up the -rc1 merge for me. * akpm: (96 commits) unlzo: fix input buffer free ramoops: update parameters only after successful init ramoops: fix use of rounddown_pow_of_two() c/r: prctl: add PR_SET_MM codes to set up mm_struct entries c/r: procfs: add start_data, end_data, start_brk members to /proc/$pid/stat v4 c/r: introduce CHECKPOINT_RESTORE symbol selftests: new x86 breakpoints selftest selftests: new very basic kernel selftests directory radix_tree: take radix_tree_path off stack radix_tree: remove radix_tree_indirect_to_ptr() dio: optimize cache misses in the submission path vfs: cache request_queue in struct block_device fs/direct-io.c: calculate fs_count correctly in get_more_blocks() drivers/parport/parport_pc.c: fix warnings panic: don't print redundant backtraces on oops sysctl: add the kernel.ns_last_pid control kdump: add udev events for memory online/offline include/linux/crash_dump.h needs elf.h kdump: fix crash_kexec()/smp_send_stop() race in panic() kdump: crashk_res init check for /sys/kernel/kexec_crash_size ...
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r--fs/direct-io.c57
1 files changed, 41 insertions, 16 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index d740ab67ff6e..4a588dbd11bf 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -36,6 +36,7 @@
#include <linux/rwsem.h>
#include <linux/uio.h>
#include <linux/atomic.h>
+#include <linux/prefetch.h>
/*
* How many user pages to map in one call to get_user_pages(). This determines
@@ -580,9 +581,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
{
int ret;
sector_t fs_startblk; /* Into file, in filesystem-sized blocks */
+ sector_t fs_endblk; /* Into file, in filesystem-sized blocks */
unsigned long fs_count; /* Number of filesystem-sized blocks */
- unsigned long dio_count;/* Number of dio_block-sized blocks */
- unsigned long blkmask;
int create;
/*
@@ -593,11 +593,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
if (ret == 0) {
BUG_ON(sdio->block_in_file >= sdio->final_block_in_request);
fs_startblk = sdio->block_in_file >> sdio->blkfactor;
- dio_count = sdio->final_block_in_request - sdio->block_in_file;
- fs_count = dio_count >> sdio->blkfactor;
- blkmask = (1 << sdio->blkfactor) - 1;
- if (dio_count & blkmask)
- fs_count++;
+ fs_endblk = (sdio->final_block_in_request - 1) >>
+ sdio->blkfactor;
+ fs_count = fs_endblk - fs_startblk + 1;
map_bh->b_state = 0;
map_bh->b_size = fs_count << dio->inode->i_blkbits;
@@ -1090,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio)
* individual fields and will generate much worse code. This is important
* for the whole file.
*/
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+static inline ssize_t
+do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
dio_submit_t submit_io, int flags)
@@ -1100,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
size_t size;
unsigned long addr;
unsigned blkbits = inode->i_blkbits;
- unsigned bdev_blkbits = 0;
unsigned blocksize_mask = (1 << blkbits) - 1;
ssize_t retval = -EINVAL;
loff_t end = offset;
@@ -1113,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
if (rw & WRITE)
rw = WRITE_ODIRECT;
- if (bdev)
- bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
+ /*
+ * Avoid references to bdev if not absolutely needed to give
+ * the early prefetch in the caller enough time.
+ */
if (offset & blocksize_mask) {
if (bdev)
- blkbits = bdev_blkbits;
+ blkbits = blksize_bits(bdev_logical_block_size(bdev));
blocksize_mask = (1 << blkbits) - 1;
if (offset & blocksize_mask)
goto out;
@@ -1129,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
addr = (unsigned long)iov[seg].iov_base;
size = iov[seg].iov_len;
end += size;
- if ((addr & blocksize_mask) || (size & blocksize_mask)) {
+ if (unlikely((addr & blocksize_mask) ||
+ (size & blocksize_mask))) {
if (bdev)
- blkbits = bdev_blkbits;
+ blkbits = blksize_bits(
+ bdev_logical_block_size(bdev));
blocksize_mask = (1 << blkbits) - 1;
- if ((addr & blocksize_mask) || (size & blocksize_mask))
+ if ((addr & blocksize_mask) || (size & blocksize_mask))
goto out;
}
}
@@ -1316,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
out:
return retval;
}
+
+ssize_t
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+ struct block_device *bdev, const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+ dio_submit_t submit_io, int flags)
+{
+ /*
+ * The block device state is needed in the end to finally
+ * submit everything. Since it's likely to be cache cold
+ * prefetch it here as first thing to hide some of the
+ * latency.
+ *
+ * Attempt to prefetch the pieces we likely need later.
+ */
+ prefetch(&bdev->bd_disk->part_tbl);
+ prefetch(bdev->bd_queue);
+ prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
+
+ return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
+ nr_segs, get_block, end_io,
+ submit_io, flags);
+}
+
EXPORT_SYMBOL(__blockdev_direct_IO);
static __init int dio_init(void)
OpenPOWER on IntegriCloud