summaryrefslogtreecommitdiffstats
path: root/freed-ora/tags/f16/3.6.8-2.fc16.gnu/block-fix-a-crash-when-block-device-is.patch
blob: af992830e0f9906adaf26a3c1ef5d58dcccebd35 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
Fix a crash when block device is read and block size is changed at the same time

commit b87570f5d349661814b262dd5fc40787700f80d6
Author: Mikulas Patocka <mpatocka@redhat.com>
Date:   Wed Sep 26 07:46:40 2012 +0200

    Fix a crash when block device is read and block size is changed at the same time
    
    The kernel may crash when block size is changed and I/O is issued
    simultaneously.
    
    Because some subsystems (udev or lvm) may read any block device anytime,
    the bug actually puts any code that changes a block device size in
    jeopardy.
    
    The crash can be reproduced if you place "msleep(1000)" to
    blkdev_get_blocks just before "bh->b_size = max_blocks <<
    inode->i_blkbits;".
    Then, run "dd if=/dev/ram0 of=/dev/null bs=4k count=1 iflag=direct"
    While it is waiting in msleep, run "blockdev --setbsz 2048 /dev/ram0"
    You get a BUG.
    
    The direct and non-direct I/O is written with the assumption that block
    size does not change. It doesn't seem practical to fix these crashes
    one-by-one there may be many crash possibilities when block size changes
    at a certain place and it is impossible to find them all and verify the
    code.
    
    This patch introduces a new rw-lock bd_block_size_semaphore. The lock is
    taken for read during I/O. It is taken for write when changing block
    size. Consequently, block size can't be changed while I/O is being
    submitted.
    
    For asynchronous I/O, the patch only prevents block size change while
    the I/O is being submitted. The block size can change when the I/O is in
    progress or when the I/O is being finished. This is acceptable because
    there are no accesses to block size when asynchronous I/O is being
    finished.
    
    The patch prevents block size changing while the device is mapped with
    mmap.
    
    Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
    Signed-off-by: Jens Axboe <axboe@kernel.dk>

Index: linux-3.6.x86_64/drivers/char/raw.c
===================================================================
--- linux-3.6.x86_64.orig/drivers/char/raw.c	2012-11-16 17:12:35.127010280 -0500
+++ linux-3.6.x86_64/drivers/char/raw.c	2012-11-16 17:12:37.381002516 -0500
@@ -285,7 +285,7 @@
 
 static const struct file_operations raw_fops = {
 	.read		= do_sync_read,
-	.aio_read	= generic_file_aio_read,
+	.aio_read	= blkdev_aio_read,
 	.write		= do_sync_write,
 	.aio_write	= blkdev_aio_write,
 	.fsync		= blkdev_fsync,
Index: linux-3.6.x86_64/fs/block_dev.c
===================================================================
--- linux-3.6.x86_64.orig/fs/block_dev.c	2012-11-16 17:12:35.127010280 -0500
+++ linux-3.6.x86_64/fs/block_dev.c	2012-11-16 17:12:37.381002516 -0500
@@ -116,6 +116,8 @@
 
 int set_blocksize(struct block_device *bdev, int size)
 {
+	struct address_space *mapping;
+
 	/* Size must be a power of two, and between 512 and PAGE_SIZE */
 	if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
 		return -EINVAL;
@@ -124,6 +126,20 @@
 	if (size < bdev_logical_block_size(bdev))
 		return -EINVAL;
 
+	/* Prevent starting I/O or mapping the device */
+	down_write(&bdev->bd_block_size_semaphore);
+
+	/* Check that the block device is not memory mapped */
+	mapping = bdev->bd_inode->i_mapping;
+	mutex_lock(&mapping->i_mmap_mutex);
+	if (!prio_tree_empty(&mapping->i_mmap) ||
+	    !list_empty(&mapping->i_mmap_nonlinear)) {
+		mutex_unlock(&mapping->i_mmap_mutex);
+		up_write(&bdev->bd_block_size_semaphore);
+		return -EBUSY;
+	}
+	mutex_unlock(&mapping->i_mmap_mutex);
+
 	/* Don't change the size if it is same as current */
 	if (bdev->bd_block_size != size) {
 		sync_blockdev(bdev);
@@ -131,6 +147,9 @@
 		bdev->bd_inode->i_blkbits = blksize_bits(size);
 		kill_bdev(bdev);
 	}
+
+	up_write(&bdev->bd_block_size_semaphore);
+
 	return 0;
 }
 
@@ -472,6 +491,7 @@
 	inode_init_once(&ei->vfs_inode);
 	/* Initialize mutex for freeze. */
 	mutex_init(&bdev->bd_fsfreeze_mutex);
+	init_rwsem(&bdev->bd_block_size_semaphore);
 }
 
 static inline void __bd_forget(struct inode *inode)
@@ -1567,6 +1587,22 @@
 	return blkdev_ioctl(bdev, mode, cmd, arg);
 }
 
+ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
+			unsigned long nr_segs, loff_t pos)
+{
+	ssize_t ret;
+	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
+
+	down_read(&bdev->bd_block_size_semaphore);
+
+	ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+
+	up_read(&bdev->bd_block_size_semaphore);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_aio_read);
+
 /*
  * Write data to the block device.  Only intended for the block device itself
  * and the raw driver which basically is a fake block device.
@@ -1578,12 +1614,16 @@
 			 unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
+	struct block_device *bdev = I_BDEV(file->f_mapping->host);
 	struct blk_plug plug;
 	ssize_t ret;
 
 	BUG_ON(iocb->ki_pos != pos);
 
 	blk_start_plug(&plug);
+
+	down_read(&bdev->bd_block_size_semaphore);
+
 	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
 	if (ret > 0 || ret == -EIOCBQUEUED) {
 		ssize_t err;
@@ -1592,11 +1632,29 @@
 		if (err < 0 && ret > 0)
 			ret = err;
 	}
+
+	up_read(&bdev->bd_block_size_semaphore);
+
 	blk_finish_plug(&plug);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(blkdev_aio_write);
 
+int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int ret;
+	struct block_device *bdev = I_BDEV(file->f_mapping->host);
+
+	down_read(&bdev->bd_block_size_semaphore);
+
+	ret = generic_file_mmap(file, vma);
+
+	up_read(&bdev->bd_block_size_semaphore);
+
+	return ret;
+}
+
 /*
  * Try to release a page associated with block device when the system
  * is under memory pressure.
@@ -1627,9 +1685,9 @@
 	.llseek		= block_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
-  	.aio_read	= generic_file_aio_read,
+  	.aio_read	= blkdev_aio_read,
 	.aio_write	= blkdev_aio_write,
-	.mmap		= generic_file_mmap,
+	.mmap		= blkdev_mmap,
 	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT
Index: linux-3.6.x86_64/include/linux/fs.h
===================================================================
--- linux-3.6.x86_64.orig/include/linux/fs.h	2012-11-16 17:12:35.127010280 -0500
+++ linux-3.6.x86_64/include/linux/fs.h	2012-11-16 17:12:37.424002387 -0500
@@ -724,6 +724,8 @@
 	int			bd_fsfreeze_count;
 	/* Mutex for freeze */
 	struct mutex		bd_fsfreeze_mutex;
+	/* A semaphore that prevents I/O while block size is being changed */
+	struct rw_semaphore	bd_block_size_semaphore;
 };
 
 /*
@@ -2564,6 +2566,8 @@
 		unsigned long *nr_segs, size_t *count, int access_flags);
 
 /* fs/block_dev.c */
+extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
+			       unsigned long nr_segs, loff_t pos);
 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos);
 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
OpenPOWER on IntegriCloud