summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/DAC960.c12
-rw-r--r--drivers/block/Kconfig3
-rw-r--r--drivers/block/brd.c11
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_int.h31
-rw-r--r--drivers/block/drbd/drbd_main.c113
-rw-r--r--drivers/block/drbd/drbd_nl.c60
-rw-r--r--drivers/block/drbd/drbd_proc.c10
-rw-r--r--drivers/block/drbd/drbd_receiver.c60
-rw-r--r--drivers/block/drbd/drbd_req.c86
-rw-r--r--drivers/block/drbd/drbd_req.h6
-rw-r--r--drivers/block/drbd/drbd_state.c48
-rw-r--r--drivers/block/drbd/drbd_state.h8
-rw-r--r--drivers/block/drbd/drbd_worker.c48
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/loop.c56
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/nbd.c15
-rw-r--r--drivers/block/null_blk.c1311
-rw-r--r--drivers/block/pktcdvd.c11
-rw-r--r--drivers/block/ps3vram.c10
-rw-r--r--drivers/block/rsxx/dev.c6
-rw-r--r--drivers/block/skd_main.c3164
-rw-r--r--drivers/block/skd_s1120.h38
-rw-r--r--drivers/block/sunvdc.c61
-rw-r--r--drivers/block/virtio_blk.c18
-rw-r--r--drivers/block/xen-blkback/blkback.c9
-rw-r--r--drivers/block/xen-blkback/xenbus.c13
-rw-r--r--drivers/block/xen-blkfront.c8
-rw-r--r--drivers/block/zram/Kconfig12
-rw-r--r--drivers/block/zram/zram_drv.c549
-rw-r--r--drivers/block/zram/zram_drv.h11
33 files changed, 2986 insertions, 2809 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 245a879b036e..255591ab3716 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1678,9 +1678,12 @@ static bool DAC960_V1_ReadControllerConfiguration(DAC960_Controller_T
Enquiry2->FirmwareID.FirmwareType = '0';
Enquiry2->FirmwareID.TurnID = 0;
}
- sprintf(Controller->FirmwareVersion, "%d.%02d-%c-%02d",
- Enquiry2->FirmwareID.MajorVersion, Enquiry2->FirmwareID.MinorVersion,
- Enquiry2->FirmwareID.FirmwareType, Enquiry2->FirmwareID.TurnID);
+ snprintf(Controller->FirmwareVersion, sizeof(Controller->FirmwareVersion),
+ "%d.%02d-%c-%02d",
+ Enquiry2->FirmwareID.MajorVersion,
+ Enquiry2->FirmwareID.MinorVersion,
+ Enquiry2->FirmwareID.FirmwareType,
+ Enquiry2->FirmwareID.TurnID);
if (!((Controller->FirmwareVersion[0] == '5' &&
strcmp(Controller->FirmwareVersion, "5.06") >= 0) ||
(Controller->FirmwareVersion[0] == '4' &&
@@ -6588,7 +6591,8 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
&dac960_proc_fops);
}
- sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber);
+ snprintf(Controller->ControllerName, sizeof(Controller->ControllerName),
+ "c%d", Controller->ControllerNumber);
ControllerProcEntry = proc_mkdir(Controller->ControllerName,
DAC960_ProcDirectoryEntry);
proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 898e4861488b..4a438b8abe27 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -17,6 +17,7 @@ if BLK_DEV
config BLK_DEV_NULL_BLK
tristate "Null test block driver"
+ depends on CONFIGFS_FS
config BLK_DEV_FD
tristate "Normal floppy disk support"
@@ -443,7 +444,7 @@ config VIRTIO_BLK
depends on VIRTIO
---help---
This is the virtual block driver for virtio. It can be used with
- lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
+ QEMU based VMMs (like KVM or Xen). Say Y or M.
config VIRTIO_BLK_SCSI
bool "SCSI passthrough request for the Virtio block driver"
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 104b71c0490d..bbd0d186cfc0 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -294,14 +294,13 @@ out:
static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
{
- struct block_device *bdev = bio->bi_bdev;
- struct brd_device *brd = bdev->bd_disk->private_data;
+ struct brd_device *brd = bio->bi_disk->private_data;
struct bio_vec bvec;
sector_t sector;
struct bvec_iter iter;
sector = bio->bi_iter.bi_sector;
- if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
+ if (bio_end_sector(bio) > get_capacity(bio->bi_disk))
goto io_error;
bio_for_each_segment(bvec, bio, iter) {
@@ -326,7 +325,11 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
struct brd_device *brd = bdev->bd_disk->private_data;
- int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
+ int err;
+
+ if (PageTransHuge(page))
+ return -ENOTSUPP;
+ err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
page_endio(page, is_write, err);
return err;
}
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index e02c45cd3c5a..5f0eaee8c8a7 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -151,7 +151,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
op_flags |= REQ_SYNC;
bio = bio_alloc_drbd(GFP_NOIO);
- bio->bi_bdev = bdev->md_bdev;
+ bio_set_dev(bio, bdev->md_bdev);
bio->bi_iter.bi_sector = sector;
err = -EIO;
if (bio_add_page(bio, device->md_io.page, size, 0) != size)
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 809fd245c3dc..bd97908c766f 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1019,7 +1019,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
- bio->bi_bdev = device->ldev->md_bdev;
+ bio_set_dev(bio, device->ldev->md_bdev);
bio->bi_iter.bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d17b6e6393c7..7e8589ce631c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -63,19 +63,15 @@
# define __must_hold(x)
#endif
-/* module parameter, defined in drbd_main.c */
-extern unsigned int minor_count;
-extern bool disable_sendpage;
-extern bool allow_oos;
-void tl_abort_disk_io(struct drbd_device *device);
-
+/* shared module parameters, defined in drbd_main.c */
#ifdef CONFIG_DRBD_FAULT_INJECTION
-extern int enable_faults;
-extern int fault_rate;
-extern int fault_devs;
+extern int drbd_enable_faults;
+extern int drbd_fault_rate;
#endif
-extern char usermode_helper[];
+extern unsigned int drbd_minor_count;
+extern char drbd_usermode_helper[];
+extern int drbd_proc_details;
/* This is used to stop/restart our threads.
@@ -181,8 +177,8 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type);
static inline int
drbd_insert_fault(struct drbd_device *device, unsigned int type) {
#ifdef CONFIG_DRBD_FAULT_INJECTION
- return fault_rate &&
- (enable_faults & (1<<type)) &&
+ return drbd_fault_rate &&
+ (drbd_enable_faults & (1<<type)) &&
_drbd_insert_fault(device, type);
#else
return 0;
@@ -745,6 +741,8 @@ struct drbd_connection {
unsigned current_tle_writes; /* writes seen within this tl epoch */
unsigned long last_reconnect_jif;
+ /* empty member on older kernels without blk_start_plug() */
+ struct blk_plug receiver_plug;
struct drbd_thread receiver;
struct drbd_thread worker;
struct drbd_thread ack_receiver;
@@ -1131,7 +1129,8 @@ extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_sta
extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *);
extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
extern void drbd_device_cleanup(struct drbd_device *device);
-void drbd_print_uuids(struct drbd_device *device, const char *text);
+extern void drbd_print_uuids(struct drbd_device *device, const char *text);
+extern void drbd_queue_unplug(struct drbd_device *device);
extern void conn_md_sync(struct drbd_connection *connection);
extern void drbd_md_write(struct drbd_device *device, void *buffer);
@@ -1463,8 +1462,6 @@ extern struct drbd_resource *drbd_find_resource(const char *name);
extern void drbd_destroy_resource(struct kref *kref);
extern void conn_free_crypto(struct drbd_connection *connection);
-extern int proc_details;
-
/* drbd_req */
extern void do_submit(struct work_struct *ws);
extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
@@ -1628,8 +1625,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
int fault_type, struct bio *bio)
{
__release(local);
- if (!bio->bi_bdev) {
- drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
+ if (!bio->bi_disk) {
+ drbd_err(device, "drbd_generic_make_request: bio->bi_disk == NULL\n");
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index e2ed28d45ce1..8cb3791898ae 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -77,41 +77,41 @@ MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
#include <linux/moduleparam.h>
-/* allow_open_on_secondary */
-MODULE_PARM_DESC(allow_oos, "DONT USE!");
/* thanks to these macros, if compiled into the kernel (not-module),
- * this becomes the boot parameter drbd.minor_count */
-module_param(minor_count, uint, 0444);
-module_param(disable_sendpage, bool, 0644);
-module_param(allow_oos, bool, 0);
-module_param(proc_details, int, 0644);
+ * these become boot parameters (e.g., drbd.minor_count) */
#ifdef CONFIG_DRBD_FAULT_INJECTION
-int enable_faults;
-int fault_rate;
-static int fault_count;
-int fault_devs;
+int drbd_enable_faults;
+int drbd_fault_rate;
+static int drbd_fault_count;
+static int drbd_fault_devs;
/* bitmap of enabled faults */
-module_param(enable_faults, int, 0664);
+module_param_named(enable_faults, drbd_enable_faults, int, 0664);
/* fault rate % value - applies to all enabled faults */
-module_param(fault_rate, int, 0664);
+module_param_named(fault_rate, drbd_fault_rate, int, 0664);
/* count of faults inserted */
-module_param(fault_count, int, 0664);
+module_param_named(fault_count, drbd_fault_count, int, 0664);
/* bitmap of devices to insert faults on */
-module_param(fault_devs, int, 0644);
+module_param_named(fault_devs, drbd_fault_devs, int, 0644);
#endif
-/* module parameter, defined */
-unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
-bool disable_sendpage;
-bool allow_oos;
-int proc_details; /* Detail level in proc drbd*/
-
+/* module parameters we can keep static */
+static bool drbd_allow_oos; /* allow_open_on_secondary */
+static bool drbd_disable_sendpage;
+MODULE_PARM_DESC(allow_oos, "DONT USE!");
+module_param_named(allow_oos, drbd_allow_oos, bool, 0);
+module_param_named(disable_sendpage, drbd_disable_sendpage, bool, 0644);
+
+/* module parameters we share */
+int drbd_proc_details; /* Detail level in proc drbd*/
+module_param_named(proc_details, drbd_proc_details, int, 0644);
+/* module parameters shared with defaults */
+unsigned int drbd_minor_count = DRBD_MINOR_COUNT_DEF;
/* Module parameter for setting the user mode helper program
* to run. Default is /sbin/drbdadm */
-char usermode_helper[80] = "/sbin/drbdadm";
-
-module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
+char drbd_usermode_helper[80] = "/sbin/drbdadm";
+module_param_named(minor_count, drbd_minor_count, uint, 0444);
+module_param_string(usermode_helper, drbd_usermode_helper, sizeof(drbd_usermode_helper), 0644);
/* in 2.6.x, our device mapping and config info contains our virtual gendisks
* as member "struct gendisk *vdisk;"
@@ -923,7 +923,9 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
}
/* communicated if (agreed_features & DRBD_FF_WSAME) */
-void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct request_queue *q)
+static void
+assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
+ struct request_queue *q)
{
if (q) {
p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
@@ -1560,7 +1562,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
* put_page(); and would cause either a VM_BUG directly, or
* __page_cache_release a page that would actually still be referenced
* by someone, leading to some obscure delayed Oops somewhere else. */
- if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
+ if (drbd_disable_sendpage || (page_count(page) < 1) || PageSlab(page))
return _drbd_no_send_page(peer_device, page, offset, size, msg_flags);
msg_flags |= MSG_NOSIGNAL;
@@ -1932,7 +1934,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode)
if (device->state.role != R_PRIMARY) {
if (mode & FMODE_WRITE)
rv = -EROFS;
- else if (!allow_oos)
+ else if (!drbd_allow_oos)
rv = -EMEDIUMTYPE;
}
@@ -1952,6 +1954,19 @@ static void drbd_release(struct gendisk *gd, fmode_t mode)
mutex_unlock(&drbd_main_mutex);
}
+/* need to hold resource->req_lock */
+void drbd_queue_unplug(struct drbd_device *device)
+{
+ if (device->state.pdsk >= D_INCONSISTENT && device->state.conn >= C_CONNECTED) {
+ D_ASSERT(device, device->state.role == R_PRIMARY);
+ if (test_and_clear_bit(UNPLUG_REMOTE, &device->flags)) {
+ drbd_queue_work_if_unqueued(
+ &first_peer_device(device)->connection->sender_work,
+ &device->unplug_work);
+ }
+ }
+}
+
static void drbd_set_defaults(struct drbd_device *device)
{
/* Beware! The actual layout differs
@@ -2008,18 +2023,14 @@ void drbd_init_set_defaults(struct drbd_device *device)
device->unplug_work.cb = w_send_write_hint;
device->bm_io_work.w.cb = w_bitmap_io;
- init_timer(&device->resync_timer);
- init_timer(&device->md_sync_timer);
- init_timer(&device->start_resync_timer);
- init_timer(&device->request_timer);
- device->resync_timer.function = resync_timer_fn;
- device->resync_timer.data = (unsigned long) device;
- device->md_sync_timer.function = md_sync_timer_fn;
- device->md_sync_timer.data = (unsigned long) device;
- device->start_resync_timer.function = start_resync_timer_fn;
- device->start_resync_timer.data = (unsigned long) device;
- device->request_timer.function = request_timer_fn;
- device->request_timer.data = (unsigned long) device;
+ setup_timer(&device->resync_timer, resync_timer_fn,
+ (unsigned long)device);
+ setup_timer(&device->md_sync_timer, md_sync_timer_fn,
+ (unsigned long)device);
+ setup_timer(&device->start_resync_timer, start_resync_timer_fn,
+ (unsigned long)device);
+ setup_timer(&device->request_timer, request_timer_fn,
+ (unsigned long)device);
init_waitqueue_head(&device->misc_wait);
init_waitqueue_head(&device->state_wait);
@@ -2131,7 +2142,7 @@ static void drbd_destroy_mempools(void)
static int drbd_create_mempools(void)
{
struct page *page;
- const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
+ const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count;
int i;
/* prepare our caches and mempools */
@@ -2167,13 +2178,12 @@ static int drbd_create_mempools(void)
goto Enomem;
/* mempools */
- drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
+ drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
if (drbd_io_bio_set == NULL)
goto Enomem;
drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
- BIOSET_NEED_BVECS |
- BIOSET_NEED_RESCUER);
+ BIOSET_NEED_BVECS);
if (drbd_md_io_bio_set == NULL)
goto Enomem;
@@ -2409,7 +2419,6 @@ static void drbd_cleanup(void)
destroy_workqueue(retry.wq);
drbd_genl_unregister();
- drbd_debugfs_cleanup();
idr_for_each_entry(&drbd_devices, device, i)
drbd_delete_device(device);
@@ -2420,6 +2429,8 @@ static void drbd_cleanup(void)
drbd_free_resource(resource);
}
+ drbd_debugfs_cleanup();
+
drbd_destroy_mempools();
unregister_blkdev(DRBD_MAJOR, "drbd");
@@ -2972,12 +2983,12 @@ static int __init drbd_init(void)
{
int err;
- if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
- pr_err("invalid minor_count (%d)\n", minor_count);
+ if (drbd_minor_count < DRBD_MINOR_COUNT_MIN || drbd_minor_count > DRBD_MINOR_COUNT_MAX) {
+ pr_err("invalid minor_count (%d)\n", drbd_minor_count);
#ifdef MODULE
return -EINVAL;
#else
- minor_count = DRBD_MINOR_COUNT_DEF;
+ drbd_minor_count = DRBD_MINOR_COUNT_DEF;
#endif
}
@@ -3900,12 +3911,12 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type)
static struct fault_random_state rrs = {0, 0};
unsigned int ret = (
- (fault_devs == 0 ||
- ((1 << device_to_minor(device)) & fault_devs) != 0) &&
- (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
+ (drbd_fault_devs == 0 ||
+ ((1 << device_to_minor(device)) & drbd_fault_devs) != 0) &&
+ (((_drbd_fault_random(&rrs) % 100) + 1) <= drbd_fault_rate));
if (ret) {
- fault_count++;
+ drbd_fault_count++;
if (__ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "***Simulating %s failure\n",
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ad0fcb43e45c..a12f77e6891e 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -344,7 +344,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
(char[60]) { }, /* address */
NULL };
char mb[14];
- char *argv[] = {usermode_helper, cmd, mb, NULL };
+ char *argv[] = {drbd_usermode_helper, cmd, mb, NULL };
struct drbd_connection *connection = first_peer_device(device)->connection;
struct sib_info sib;
int ret;
@@ -359,19 +359,19 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
* write out any unsynced meta data changes now */
drbd_md_sync(device);
- drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
+ drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb);
sib.sib_reason = SIB_HELPER_PRE;
sib.helper_name = cmd;
drbd_bcast_event(device, &sib);
notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
- ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
+ ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, mb,
+ drbd_usermode_helper, cmd, mb,
(ret >> 8) & 0xff, ret);
else
drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, mb,
+ drbd_usermode_helper, cmd, mb,
(ret >> 8) & 0xff, ret);
sib.sib_reason = SIB_HELPER_POST;
sib.helper_exit_code = ret;
@@ -396,24 +396,24 @@ enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd)
(char[60]) { }, /* address */
NULL };
char *resource_name = connection->resource->name;
- char *argv[] = {usermode_helper, cmd, resource_name, NULL };
+ char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL };
int ret;
setup_khelper_env(connection, envp);
conn_md_sync(connection);
- drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
+ drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name);
/* TODO: conn_bcast_event() ?? */
notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
- ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
+ ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, resource_name,
+ drbd_usermode_helper, cmd, resource_name,
(ret >> 8) & 0xff, ret);
else
drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, resource_name,
+ drbd_usermode_helper, cmd, resource_name,
(ret >> 8) & 0xff, ret);
/* TODO: conn_bcast_event() ?? */
notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
@@ -1236,12 +1236,18 @@ static void fixup_discard_if_not_supported(struct request_queue *q)
static void decide_on_write_same_support(struct drbd_device *device,
struct request_queue *q,
- struct request_queue *b, struct o_qlim *o)
+ struct request_queue *b, struct o_qlim *o,
+ bool disable_write_same)
{
struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device->connection;
bool can_do = b ? b->limits.max_write_same_sectors : true;
+ if (can_do && disable_write_same) {
+ can_do = false;
+ drbd_info(peer_device, "WRITE_SAME disabled by config\n");
+ }
+
if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) {
can_do = false;
drbd_info(peer_device, "peer does not support WRITE_SAME\n");
@@ -1302,6 +1308,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
struct request_queue *b = NULL;
struct disk_conf *dc;
bool discard_zeroes_if_aligned = true;
+ bool disable_write_same = false;
if (bdev) {
b = bdev->backing_bdev->bd_disk->queue;
@@ -1311,6 +1318,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
dc = rcu_dereference(device->ldev->disk_conf);
max_segments = dc->max_bio_bvecs;
discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
+ disable_write_same = dc->disable_write_same;
rcu_read_unlock();
blk_set_stacking_limits(&q->limits);
@@ -1321,7 +1329,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_SIZE-1);
decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
- decide_on_write_same_support(device, q, b, o);
+ decide_on_write_same_support(device, q, b, o, disable_write_same);
if (b) {
blk_queue_stack_limits(q, b);
@@ -1612,7 +1620,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
if (write_ordering_changed(old_disk_conf, new_disk_conf))
drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
- if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned)
+ if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned
+ || old_disk_conf->disable_write_same != new_disk_conf->disable_write_same)
drbd_reconsider_queue_parameters(device, device->ldev, NULL);
drbd_md_sync(device);
@@ -2140,34 +2149,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
static int adm_detach(struct drbd_device *device, int force)
{
- enum drbd_state_rv retcode;
- void *buffer;
- int ret;
-
if (force) {
set_bit(FORCE_DETACH, &device->flags);
drbd_force_state(device, NS(disk, D_FAILED));
- retcode = SS_SUCCESS;
- goto out;
+ return SS_SUCCESS;
}
- drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
- buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
- if (buffer) {
- retcode = drbd_request_state(device, NS(disk, D_FAILED));
- drbd_md_put_buffer(device);
- } else /* already <= D_FAILED */
- retcode = SS_NOTHING_TO_DO;
- /* D_FAILED will transition to DISKLESS. */
- drbd_resume_io(device);
- ret = wait_event_interruptible(device->misc_wait,
- device->state.disk != D_FAILED);
- if ((int)retcode == (int)SS_IS_DISKLESS)
- retcode = SS_NOTHING_TO_DO;
- if (ret)
- retcode = ERR_INTR;
-out:
- return retcode;
+ return drbd_request_detach_interruptible(device);
}
/* Detaching the disk is a process in multiple stages. First we need to lock
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 8378142f7a55..582caeb0de86 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -127,7 +127,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
seq_putc(seq, '=');
seq_putc(seq, '>');
for (i = 0; i < y; i++)
- seq_printf(seq, ".");
+ seq_putc(seq, '.');
seq_puts(seq, "] ");
if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
@@ -179,7 +179,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
seq_printf_with_thousands_grouping(seq, dbdt);
seq_puts(seq, " (");
/* ------------------------- ~3s average ------------------------ */
- if (proc_details >= 1) {
+ if (drbd_proc_details >= 1) {
/* this is what drbd_rs_should_slow_down() uses */
i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
dt = (jiffies - device->rs_mark_time[i]) / HZ;
@@ -209,7 +209,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
}
seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
- if (proc_details >= 1) {
+ if (drbd_proc_details >= 1) {
/* 64 bit:
* we convert to sectors in the display below. */
unsigned long bm_bits = drbd_bm_bits(device);
@@ -332,13 +332,13 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
state.conn == C_VERIFY_T)
drbd_syncer_progress(device, seq, state);
- if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
+ if (drbd_proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
lc_seq_printf_stats(seq, device->resync);
lc_seq_printf_stats(seq, device->act_log);
put_ldev(device);
}
- if (proc_details >= 2)
+ if (drbd_proc_details >= 2)
seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
}
rcu_read_unlock();
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c7e95e6380fb..796eaf347dc0 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -332,7 +332,7 @@ static void drbd_free_pages(struct drbd_device *device, struct page *page, int i
if (page == NULL)
return;
- if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
+ if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
i = page_chain_free(page);
else {
struct page *tmp;
@@ -1100,7 +1100,10 @@ randomize:
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
mutex_lock(peer_device->device->state_mutex);
+ /* avoid a race with conn_request_state( C_DISCONNECTING ) */
+ spin_lock_irq(&connection->resource->req_lock);
set_bit(STATE_SENT, &connection->flags);
+ spin_unlock_irq(&connection->resource->req_lock);
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
mutex_unlock(peer_device->device->state_mutex);
@@ -1194,6 +1197,14 @@ static int decode_header(struct drbd_connection *connection, void *header, struc
return 0;
}
+static void drbd_unplug_all_devices(struct drbd_connection *connection)
+{
+ if (current->plug == &connection->receiver_plug) {
+ blk_finish_plug(&connection->receiver_plug);
+ blk_start_plug(&connection->receiver_plug);
+ } /* else: maybe just schedule() ?? */
+}
+
static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
{
void *buffer = connection->data.rbuf;
@@ -1209,6 +1220,36 @@ static int drbd_recv_header(struct drbd_connection *connection, struct packet_in
return err;
}
+static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
+{
+ void *buffer = connection->data.rbuf;
+ unsigned int size = drbd_header_size(connection);
+ int err;
+
+ err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
+ if (err != size) {
+ /* If we have nothing in the receive buffer now, to reduce
+ * application latency, try to drain the backend queues as
+ * quickly as possible, and let remote TCP know what we have
+ * received so far. */
+ if (err == -EAGAIN) {
+ drbd_tcp_quickack(connection->data.socket);
+ drbd_unplug_all_devices(connection);
+ }
+ if (err > 0) {
+ buffer += err;
+ size -= err;
+ }
+ err = drbd_recv_all_warn(connection, buffer, size);
+ if (err)
+ return err;
+ }
+
+ err = decode_header(connection, connection->data.rbuf, pi);
+ connection->last_received = jiffies;
+
+ return err;
+}
/* This is blkdev_issue_flush, but asynchronous.
* We want to submit to all component volumes in parallel,
* then wait for all completions.
@@ -1223,7 +1264,7 @@ struct one_flush_context {
struct issue_flush_context *ctx;
};
-void one_flush_endio(struct bio *bio)
+static void one_flush_endio(struct bio *bio)
{
struct one_flush_context *octx = bio->bi_private;
struct drbd_device *device = octx->device;
@@ -1265,7 +1306,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
octx->device = device;
octx->ctx = ctx;
- bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(bio, device->ldev->backing_bdev);
bio->bi_private = octx;
bio->bi_end_io = one_flush_endio;
bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
@@ -1548,7 +1589,7 @@ next_bio:
}
/* > peer_req->i.sector, unless this is the first bio */
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(bio, device->ldev->backing_bdev);
bio_set_op_attrs(bio, op, op_flags);
bio->bi_private = peer_req;
bio->bi_end_io = drbd_peer_request_endio;
@@ -4085,7 +4126,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info
return config_unknown_volume(connection, pi);
device = peer_device->device;
- p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
+ p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
if (!p_uuid) {
drbd_err(device, "kmalloc of p_uuid failed\n");
return false;
@@ -4882,8 +4923,8 @@ static void drbdd(struct drbd_connection *connection)
struct data_cmd const *cmd;
drbd_thread_current_set_cpu(&connection->receiver);
- update_receiver_timing_details(connection, drbd_recv_header);
- if (drbd_recv_header(connection, &pi))
+ update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
+ if (drbd_recv_header_maybe_unplug(connection, &pi))
goto err_out;
cmd = &drbd_cmd_handler[pi.cmd];
@@ -5375,8 +5416,11 @@ int drbd_receiver(struct drbd_thread *thi)
}
} while (h == 0);
- if (h > 0)
+ if (h > 0) {
+ blk_start_plug(&connection->receiver_plug);
drbdd(connection);
+ blk_finish_plug(&connection->receiver_plug);
+ }
conn_disconnect(connection);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f6e865b2d543..de8566e55334 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -36,14 +36,18 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector,
/* Update disk stats at start of I/O request */
static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req)
{
- generic_start_io_acct(bio_data_dir(req->master_bio), req->i.size >> 9,
- &device->vdisk->part0);
+ struct request_queue *q = device->rq_queue;
+
+ generic_start_io_acct(q, bio_data_dir(req->master_bio),
+ req->i.size >> 9, &device->vdisk->part0);
}
/* Update disk stats when completing request upwards */
static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
{
- generic_end_io_acct(bio_data_dir(req->master_bio),
+ struct request_queue *q = device->rq_queue;
+
+ generic_end_io_acct(q, bio_data_dir(req->master_bio),
&device->vdisk->part0, req->start_jif);
}
@@ -1175,7 +1179,7 @@ drbd_submit_req_private_bio(struct drbd_request *req)
else
type = DRBD_FAULT_DT_RD;
- bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(bio, device->ldev->backing_bdev);
/* State may have changed since we grabbed our reference on the
* ->ldev member. Double check, and short-circuit to endio.
@@ -1275,6 +1279,57 @@ static bool may_do_writes(struct drbd_device *device)
return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE;
}
+struct drbd_plug_cb {
+ struct blk_plug_cb cb;
+ struct drbd_request *most_recent_req;
+ /* do we need more? */
+};
+
+static void drbd_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+ struct drbd_plug_cb *plug = container_of(cb, struct drbd_plug_cb, cb);
+ struct drbd_resource *resource = plug->cb.data;
+ struct drbd_request *req = plug->most_recent_req;
+
+ kfree(cb);
+ if (!req)
+ return;
+
+ spin_lock_irq(&resource->req_lock);
+ /* In case the sender did not process it yet, raise the flag to
+ * have it followed with P_UNPLUG_REMOTE just after. */
+ req->rq_state |= RQ_UNPLUG;
+ /* but also queue a generic unplug */
+ drbd_queue_unplug(req->device);
+ kref_put(&req->kref, drbd_req_destroy);
+ spin_unlock_irq(&resource->req_lock);
+}
+
+static struct drbd_plug_cb* drbd_check_plugged(struct drbd_resource *resource)
+{
+ /* A lot of text to say
+ * return (struct drbd_plug_cb*)blk_check_plugged(); */
+ struct drbd_plug_cb *plug;
+ struct blk_plug_cb *cb = blk_check_plugged(drbd_unplug, resource, sizeof(*plug));
+
+ if (cb)
+ plug = container_of(cb, struct drbd_plug_cb, cb);
+ else
+ plug = NULL;
+ return plug;
+}
+
+static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req)
+{
+ struct drbd_request *tmp = plug->most_recent_req;
+ /* Will be sent to some peer.
+ * Remember to tag it with UNPLUG_REMOTE on unplug */
+ kref_get(&req->kref);
+ plug->most_recent_req = req;
+ if (tmp)
+ kref_put(&tmp->kref, drbd_req_destroy);
+}
+
static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
{
struct drbd_resource *resource = device->resource;
@@ -1347,6 +1402,12 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
no_remote = true;
}
+ if (no_remote == false) {
+ struct drbd_plug_cb *plug = drbd_check_plugged(resource);
+ if (plug)
+ drbd_update_plug(plug, req);
+ }
+
/* If it took the fast path in drbd_request_prepare, add it here.
* The slow path has added it already. */
if (list_empty(&req->req_pending_master_completion))
@@ -1395,7 +1456,10 @@ void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned l
static void submit_fast_path(struct drbd_device *device, struct list_head *incoming)
{
+ struct blk_plug plug;
struct drbd_request *req, *tmp;
+
+ blk_start_plug(&plug);
list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
const int rw = bio_data_dir(req->master_bio);
@@ -1413,6 +1477,7 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
+ blk_finish_plug(&plug);
}
static bool prepare_al_transaction_nonblock(struct drbd_device *device,
@@ -1420,12 +1485,12 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
struct list_head *pending,
struct list_head *later)
{
- struct drbd_request *req, *tmp;
+ struct drbd_request *req;
int wake = 0;
int err;
spin_lock_irq(&device->al_lock);
- list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
+ while ((req = list_first_entry_or_null(incoming, struct drbd_request, tl_requests))) {
err = drbd_al_begin_io_nonblock(device, &req->i);
if (err == -ENOBUFS)
break;
@@ -1442,17 +1507,20 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
return !list_empty(pending);
}
-void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
+static void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
{
- struct drbd_request *req, *tmp;
+ struct blk_plug plug;
+ struct drbd_request *req;
- list_for_each_entry_safe(req, tmp, pending, tl_requests) {
+ blk_start_plug(&plug);
+ while ((req = list_first_entry_or_null(pending, struct drbd_request, tl_requests))) {
req->rq_state |= RQ_IN_ACT_LOG;
req->in_actlog_jif = jiffies;
atomic_dec(&device->ap_actlog_cnt);
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
+ blk_finish_plug(&plug);
}
void do_submit(struct work_struct *ws)
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 9e1866ab238f..a2254f825601 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -212,6 +212,11 @@ enum drbd_req_state_bits {
/* Should call drbd_al_complete_io() for this request... */
__RQ_IN_ACT_LOG,
+ /* This was the most recent request during some blk_finish_plug()
+ * or its implicit from-schedule equivalent.
+ * We may use it as hint to send a P_UNPLUG_REMOTE */
+ __RQ_UNPLUG,
+
/* The peer has sent a retry ACK */
__RQ_POSTPONED,
@@ -249,6 +254,7 @@ enum drbd_req_state_bits {
#define RQ_WSAME (1UL << __RQ_WSAME)
#define RQ_UNMAP (1UL << __RQ_UNMAP)
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
+#define RQ_UNPLUG (1UL << __RQ_UNPLUG)
#define RQ_POSTPONED (1UL << __RQ_POSTPONED)
#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index eea0c4aec978..0813c654c893 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -346,7 +346,7 @@ static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
enum drbd_role conn_highest_role(struct drbd_connection *connection)
{
- enum drbd_role role = R_UNKNOWN;
+ enum drbd_role role = R_SECONDARY;
struct drbd_peer_device *peer_device;
int vnr;
@@ -579,11 +579,14 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
unsigned long flags;
union drbd_state os, ns;
enum drbd_state_rv rv;
+ void *buffer = NULL;
init_completion(&done);
if (f & CS_SERIALIZE)
mutex_lock(device->state_mutex);
+ if (f & CS_INHIBIT_MD_IO)
+ buffer = drbd_md_get_buffer(device, __func__);
spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device);
@@ -636,6 +639,8 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
}
abort:
+ if (buffer)
+ drbd_md_put_buffer(device);
if (f & CS_SERIALIZE)
mutex_unlock(device->state_mutex);
@@ -664,6 +669,47 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask,
return rv;
}
+/*
+ * We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while
+ * there is IO in-flight: the transition into D_FAILED for detach purposes
+ * may get misinterpreted as actual IO error in a confused endio function.
+ *
+ * We wrap it all into wait_event(), to retry in case the drbd_req_state()
+ * returns SS_IN_TRANSIENT_STATE.
+ *
+ * To avoid potential deadlock with e.g. the receiver thread trying to grab
+ * drbd_md_get_buffer() while trying to get out of the "transient state", we
+ * need to grab and release the meta data buffer inside of that wait_event loop.
+ */
+static enum drbd_state_rv
+request_detach(struct drbd_device *device)
+{
+ return drbd_req_state(device, NS(disk, D_FAILED),
+ CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
+}
+
+enum drbd_state_rv
+drbd_request_detach_interruptible(struct drbd_device *device)
+{
+ enum drbd_state_rv rv;
+ int ret;
+
+ drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
+ wait_event_interruptible(device->state_wait,
+ (rv = request_detach(device)) != SS_IN_TRANSIENT_STATE);
+ drbd_resume_io(device);
+
+ ret = wait_event_interruptible(device->misc_wait,
+ device->state.disk != D_FAILED);
+
+ if (rv == SS_IS_DISKLESS)
+ rv = SS_NOTHING_TO_DO;
+ if (ret)
+ rv = ERR_INTR;
+
+ return rv;
+}
+
enum drbd_state_rv
_drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask,
union drbd_state val, enum chg_state_flags f)
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index 6c9d5d4a8a75..0276c98fbbdd 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -71,6 +71,10 @@ enum chg_state_flags {
CS_DC_SUSP = 1 << 10,
CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
CS_IGN_OUTD_FAIL = 1 << 11,
+
+ /* Make sure no meta data IO is in flight, by calling
+ * drbd_md_get_buffer(). Used for graceful detach. */
+ CS_INHIBIT_MD_IO = 1 << 12,
};
/* drbd_dev_state and drbd_state are different types. This is to stress the
@@ -156,6 +160,10 @@ static inline int drbd_request_state(struct drbd_device *device,
return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED);
}
+/* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */
+enum drbd_state_rv
+drbd_request_detach_interruptible(struct drbd_device *device);
+
enum drbd_role conn_highest_role(struct drbd_connection *connection);
enum drbd_role conn_highest_peer(struct drbd_connection *connection);
enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1d8726a8df34..03471b3fce86 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -65,6 +65,11 @@ void drbd_md_endio(struct bio *bio)
device = bio->bi_private;
device->md_io.error = blk_status_to_errno(bio->bi_status);
+ /* special case: drbd_md_read() during drbd_adm_attach() */
+ if (device->ldev)
+ put_ldev(device);
+ bio_put(bio);
+
/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
* If this io completion runs after that timeout expired, this
@@ -79,9 +84,6 @@ void drbd_md_endio(struct bio *bio)
drbd_md_put_buffer(device);
device->md_io.done = 1;
wake_up(&device->misc_wait);
- bio_put(bio);
- if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
- put_ldev(device);
}
/* reads on behalf of the partner,
@@ -128,6 +130,14 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
block_id = peer_req->block_id;
peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
+ if (peer_req->flags & EE_WAS_ERROR) {
+ /* In protocol != C, we usually do not send write acks.
+ * In case of a write error, send the neg ack anyways. */
+ if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags))
+ inc_unacked(device);
+ drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
+ }
+
spin_lock_irqsave(&device->resource->req_lock, flags);
device->writ_cnt += peer_req->i.size >> 9;
list_move_tail(&peer_req->w.list, &device->done_ee);
@@ -195,7 +205,8 @@ void drbd_peer_request_endio(struct bio *bio)
}
}
-void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
+static void
+drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
{
panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
device->minor, device->resource->name, device->vnr);
@@ -1382,18 +1393,22 @@ static int drbd_send_barrier(struct drbd_connection *connection)
return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
}
+static int pd_send_unplug_remote(struct drbd_peer_device *pd)
+{
+ struct drbd_socket *sock = &pd->connection->data;
+ if (!drbd_prepare_command(pd, sock))
+ return -EIO;
+ return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
+}
+
int w_send_write_hint(struct drbd_work *w, int cancel)
{
struct drbd_device *device =
container_of(w, struct drbd_device, unplug_work);
- struct drbd_socket *sock;
if (cancel)
return 0;
- sock = &first_peer_device(device)->connection->data;
- if (!drbd_prepare_command(first_peer_device(device), sock))
- return -EIO;
- return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
+ return pd_send_unplug_remote(first_peer_device(device));
}
static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
@@ -1455,6 +1470,7 @@ int w_send_dblock(struct drbd_work *w, int cancel)
struct drbd_device *device = req->device;
struct drbd_peer_device *const peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device->connection;
+ bool do_send_unplug = req->rq_state & RQ_UNPLUG;
int err;
if (unlikely(cancel)) {
@@ -1470,6 +1486,9 @@ int w_send_dblock(struct drbd_work *w, int cancel)
err = drbd_send_dblock(peer_device, req);
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+ if (do_send_unplug && !err)
+ pd_send_unplug_remote(peer_device);
+
return err;
}
@@ -1484,6 +1503,7 @@ int w_send_read_req(struct drbd_work *w, int cancel)
struct drbd_device *device = req->device;
struct drbd_peer_device *const peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device->connection;
+ bool do_send_unplug = req->rq_state & RQ_UNPLUG;
int err;
if (unlikely(cancel)) {
@@ -1501,6 +1521,9 @@ int w_send_read_req(struct drbd_work *w, int cancel)
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+ if (do_send_unplug && !err)
+ pd_send_unplug_remote(peer_device);
+
return err;
}
@@ -1513,7 +1536,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
drbd_al_begin_io(device, &req->i);
drbd_req_make_private_bio(req, req->master_bio);
- req->private_bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(req->private_bio, device->ldev->backing_bdev);
generic_make_request(req->private_bio);
return 0;
@@ -1733,6 +1756,11 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
return;
}
+ if (!connection) {
+ drbd_err(device, "No connection to peer, aborting!\n");
+ return;
+ }
+
if (!test_bit(B_RS_H_DONE, &device->flags)) {
if (side == C_SYNC_TARGET) {
/* Since application IO was locked out during C_WF_BITMAP_T and
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 9c00f29e40c1..60c086a53609 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4134,7 +4134,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
cbdata.drive = drive;
bio_init(&bio, &bio_vec, 1);
- bio.bi_bdev = bdev;
+ bio_set_dev(&bio, bdev);
bio_add_page(&bio, page, size, 0);
bio.bi_iter.bi_sector = 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ef8334949b42..407cb172d6e3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -221,8 +221,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
}
static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
- loff_t logical_blocksize)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
@@ -234,12 +233,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
- if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
- lo->lo_logical_blocksize = logical_blocksize;
- blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
- blk_queue_logical_block_size(lo->lo_queue,
- lo->lo_logical_blocksize);
- }
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
@@ -820,7 +813,6 @@ static void loop_config_discard(struct loop_device *lo)
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue;
- int lo_bits = 9;
/*
* We use punch hole to reclaim the free space used by the
@@ -840,11 +832,9 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
- lo_bits = blksize_bits(lo->lo_logical_blocksize);
- blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
- blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
+ blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
+ blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
@@ -938,7 +928,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
lo->use_dio = false;
lo->lo_blocksize = lo_blocksize;
- lo->lo_logical_blocksize = 512;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
@@ -1104,7 +1093,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
- int lo_flags = lo->lo_flags;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
@@ -1137,26 +1125,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
goto exit;
- if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
- if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
- lo->lo_logical_blocksize = 512;
- lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
- if (LO_INFO_BLOCKSIZE(info) != 512 &&
- LO_INFO_BLOCKSIZE(info) != 1024 &&
- LO_INFO_BLOCKSIZE(info) != 2048 &&
- LO_INFO_BLOCKSIZE(info) != 4096)
- return -EINVAL;
- if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
- return -EINVAL;
- }
-
if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit ||
- lo->lo_flags != lo_flags ||
- ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
- lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
- LO_INFO_BLOCKSIZE(info))) {
+ lo->lo_sizelimit != info->lo_sizelimit) {
+ if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
goto exit;
}
@@ -1348,8 +1319,7 @@ static int loop_set_capacity(struct loop_device *lo)
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
- lo->lo_logical_blocksize);
+ return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1996,10 +1966,6 @@ static int __init loop_init(void)
struct loop_device *lo;
int err;
- err = misc_register(&loop_misc);
- if (err < 0)
- return err;
-
part_shift = 0;
if (max_part > 0) {
part_shift = fls(max_part);
@@ -2017,12 +1983,12 @@ static int __init loop_init(void)
if ((1UL << part_shift) > DISK_MAX_PARTS) {
err = -EINVAL;
- goto misc_out;
+ goto err_out;
}
if (max_loop > 1UL << (MINORBITS - part_shift)) {
err = -EINVAL;
- goto misc_out;
+ goto err_out;
}
/*
@@ -2041,6 +2007,11 @@ static int __init loop_init(void)
range = 1UL << MINORBITS;
}
+ err = misc_register(&loop_misc);
+ if (err < 0)
+ goto err_out;
+
+
if (register_blkdev(LOOP_MAJOR, "loop")) {
err = -EIO;
goto misc_out;
@@ -2060,6 +2031,7 @@ static int __init loop_init(void)
misc_out:
misc_deregister(&loop_misc);
+err_out:
return err;
}
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 2c096b9a17b8..fecd3f97ef8c 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -49,7 +49,6 @@ struct loop_device {
struct file * lo_backing_file;
struct block_device *lo_device;
unsigned lo_blocksize;
- unsigned lo_logical_blocksize;
void *key_data;
gfp_t old_gfp_mask;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5bdf923294a5..2aa87cbdede0 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -128,7 +128,7 @@ static struct dentry *nbd_dbg_dir;
#define NBD_MAGIC 0x68797548
static unsigned int nbds_max = 16;
-static int max_part;
+static int max_part = 16;
static struct workqueue_struct *recv_workqueue;
static int part_shift;
@@ -165,7 +165,7 @@ static ssize_t pid_show(struct device *dev,
return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
}
-static struct device_attribute pid_attr = {
+static const struct device_attribute pid_attr = {
.attr = { .name = "pid", .mode = S_IRUGO},
.show = pid_show,
};
@@ -1584,6 +1584,15 @@ again:
}
} else {
nbd = idr_find(&nbd_index_idr, index);
+ if (!nbd) {
+ ret = nbd_dev_add(index);
+ if (ret < 0) {
+ mutex_unlock(&nbd_index_mutex);
+ printk(KERN_ERR "nbd: failed to add new device\n");
+ return ret;
+ }
+ nbd = idr_find(&nbd_index_idr, index);
+ }
}
if (!nbd) {
printk(KERN_ERR "nbd: couldn't find device at index %d\n",
@@ -2137,4 +2146,4 @@ MODULE_LICENSE("GPL");
module_param(nbds_max, int, 0444);
MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
module_param(max_part, int, 0444);
-MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");
+MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)");
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 85c24cace973..8042c26ea9e6 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -1,3 +1,7 @@
+/*
+ * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
+ * Shaohua Li <shli@fb.com>
+ */
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -9,27 +13,110 @@
#include <linux/blk-mq.h>
#include <linux/hrtimer.h>
#include <linux/lightnvm.h>
+#include <linux/configfs.h>
+#include <linux/badblocks.h>
+
+#define SECTOR_SHIFT 9
+#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#define SECTOR_MASK (PAGE_SECTORS - 1)
+
+#define FREE_BATCH 16
+
+#define TICKS_PER_SEC 50ULL
+#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
+
+static inline u64 mb_per_tick(int mbps)
+{
+ return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
+}
struct nullb_cmd {
struct list_head list;
struct llist_node ll_list;
- struct call_single_data csd;
+ call_single_data_t csd;
struct request *rq;
struct bio *bio;
unsigned int tag;
struct nullb_queue *nq;
struct hrtimer timer;
+ blk_status_t error;
};
struct nullb_queue {
unsigned long *tag_map;
wait_queue_head_t wait;
unsigned int queue_depth;
+ struct nullb_device *dev;
struct nullb_cmd *cmds;
};
+/*
+ * Status flags for nullb_device.
+ *
+ * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
+ * UP: Device is currently on and visible in userspace.
+ * THROTTLED: Device is being throttled.
+ * CACHE: Device is using a write-back cache.
+ */
+enum nullb_device_flags {
+ NULLB_DEV_FL_CONFIGURED = 0,
+ NULLB_DEV_FL_UP = 1,
+ NULLB_DEV_FL_THROTTLED = 2,
+ NULLB_DEV_FL_CACHE = 3,
+};
+
+/*
+ * nullb_page is a page in memory for nullb devices.
+ *
+ * @page: The page holding the data.
+ * @bitmap: The bitmap represents which sector in the page has data.
+ * Each bit represents one block size. For example, sector 8
+ * will use the 7th bit
+ * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
+ * page is being flushing to storage. FREE means the cache page is freed and
+ * should be skipped from flushing to storage. Please see
+ * null_make_cache_space
+ */
+struct nullb_page {
+ struct page *page;
+ unsigned long bitmap;
+};
+#define NULLB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1)
+#define NULLB_PAGE_FREE (sizeof(unsigned long) * 8 - 2)
+
+struct nullb_device {
+ struct nullb *nullb;
+ struct config_item item;
+ struct radix_tree_root data; /* data stored in the disk */
+ struct radix_tree_root cache; /* disk cache data */
+ unsigned long flags; /* device flags */
+ unsigned int curr_cache;
+ struct badblocks badblocks;
+
+ unsigned long size; /* device size in MB */
+ unsigned long completion_nsec; /* time in ns to complete a request */
+ unsigned long cache_size; /* disk cache size in MB */
+ unsigned int submit_queues; /* number of submission queues */
+ unsigned int home_node; /* home node for the device */
+ unsigned int queue_mode; /* block interface */
+ unsigned int blocksize; /* block size */
+ unsigned int irqmode; /* IRQ completion handler */
+ unsigned int hw_queue_depth; /* queue depth */
+ unsigned int index; /* index of the disk, only valid with a disk */
+ unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
+ bool use_lightnvm; /* register as a LightNVM device */
+ bool blocking; /* blocking blk-mq device */
+ bool use_per_node_hctx; /* use per-node allocation for hardware context */
+ bool power; /* power on/off the device */
+ bool memory_backed; /* if data is stored in memory */
+ bool discard; /* if support discard */
+};
+
struct nullb {
+ struct nullb_device *dev;
struct list_head list;
unsigned int index;
struct request_queue *q;
@@ -37,8 +124,10 @@ struct nullb {
struct nvm_dev *ndev;
struct blk_mq_tag_set *tag_set;
struct blk_mq_tag_set __tag_set;
- struct hrtimer timer;
unsigned int queue_depth;
+ atomic_long_t cur_bytes;
+ struct hrtimer bw_timer;
+ unsigned long cache_flush_pos;
spinlock_t lock;
struct nullb_queue *queues;
@@ -49,7 +138,7 @@ struct nullb {
static LIST_HEAD(nullb_list);
static struct mutex lock;
static int null_major;
-static int nullb_indexes;
+static DEFINE_IDA(nullb_indexes);
static struct kmem_cache *ppa_cache;
static struct blk_mq_tag_set tag_set;
@@ -65,15 +154,15 @@ enum {
NULL_Q_MQ = 2,
};
-static int submit_queues;
-module_param(submit_queues, int, S_IRUGO);
+static int g_submit_queues = 1;
+module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
MODULE_PARM_DESC(submit_queues, "Number of submission queues");
-static int home_node = NUMA_NO_NODE;
-module_param(home_node, int, S_IRUGO);
+static int g_home_node = NUMA_NO_NODE;
+module_param_named(home_node, g_home_node, int, S_IRUGO);
MODULE_PARM_DESC(home_node, "Home node for the device");
-static int queue_mode = NULL_Q_MQ;
+static int g_queue_mode = NULL_Q_MQ;
static int null_param_store_val(const char *str, int *val, int min, int max)
{
@@ -92,7 +181,7 @@ static int null_param_store_val(const char *str, int *val, int min, int max)
static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
{
- return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ);
+ return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
}
static const struct kernel_param_ops null_queue_mode_param_ops = {
@@ -100,38 +189,38 @@ static const struct kernel_param_ops null_queue_mode_param_ops = {
.get = param_get_int,
};
-device_param_cb(queue_mode, &null_queue_mode_param_ops, &queue_mode, S_IRUGO);
+device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO);
MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
-static int gb = 250;
-module_param(gb, int, S_IRUGO);
+static int g_gb = 250;
+module_param_named(gb, g_gb, int, S_IRUGO);
MODULE_PARM_DESC(gb, "Size in GB");
-static int bs = 512;
-module_param(bs, int, S_IRUGO);
+static int g_bs = 512;
+module_param_named(bs, g_bs, int, S_IRUGO);
MODULE_PARM_DESC(bs, "Block size (in bytes)");
static int nr_devices = 1;
module_param(nr_devices, int, S_IRUGO);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
-static bool use_lightnvm;
-module_param(use_lightnvm, bool, S_IRUGO);
+static bool g_use_lightnvm;
+module_param_named(use_lightnvm, g_use_lightnvm, bool, S_IRUGO);
MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
-static bool blocking;
-module_param(blocking, bool, S_IRUGO);
+static bool g_blocking;
+module_param_named(blocking, g_blocking, bool, S_IRUGO);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
static bool shared_tags;
module_param(shared_tags, bool, S_IRUGO);
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
-static int irqmode = NULL_IRQ_SOFTIRQ;
+static int g_irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
{
- return null_param_store_val(str, &irqmode, NULL_IRQ_NONE,
+ return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
NULL_IRQ_TIMER);
}
@@ -140,21 +229,358 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
.get = param_get_int,
};
-device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
+device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO);
MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
-static unsigned long completion_nsec = 10000;
-module_param(completion_nsec, ulong, S_IRUGO);
+static unsigned long g_completion_nsec = 10000;
+module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO);
MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
-static int hw_queue_depth = 64;
-module_param(hw_queue_depth, int, S_IRUGO);
+static int g_hw_queue_depth = 64;
+module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO);
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
-static bool use_per_node_hctx = false;
-module_param(use_per_node_hctx, bool, S_IRUGO);
+static bool g_use_per_node_hctx;
+module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO);
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
+static struct nullb_device *null_alloc_dev(void);
+static void null_free_dev(struct nullb_device *dev);
+static void null_del_dev(struct nullb *nullb);
+static int null_add_dev(struct nullb_device *dev);
+static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
+
+static inline struct nullb_device *to_nullb_device(struct config_item *item)
+{
+ return item ? container_of(item, struct nullb_device, item) : NULL;
+}
+
+static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", val);
+}
+
+static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%lu\n", val);
+}
+
+static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", val);
+}
+
+static ssize_t nullb_device_uint_attr_store(unsigned int *val,
+ const char *page, size_t count)
+{
+ unsigned int tmp;
+ int result;
+
+ result = kstrtouint(page, 0, &tmp);
+ if (result)
+ return result;
+
+ *val = tmp;
+ return count;
+}
+
+static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
+ const char *page, size_t count)
+{
+ int result;
+ unsigned long tmp;
+
+ result = kstrtoul(page, 0, &tmp);
+ if (result)
+ return result;
+
+ *val = tmp;
+ return count;
+}
+
+static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
+ size_t count)
+{
+ bool tmp;
+ int result;
+
+ result = kstrtobool(page, &tmp);
+ if (result)
+ return result;
+
+ *val = tmp;
+ return count;
+}
+
+/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
+#define NULLB_DEVICE_ATTR(NAME, TYPE) \
+static ssize_t \
+nullb_device_##NAME##_show(struct config_item *item, char *page) \
+{ \
+ return nullb_device_##TYPE##_attr_show( \
+ to_nullb_device(item)->NAME, page); \
+} \
+static ssize_t \
+nullb_device_##NAME##_store(struct config_item *item, const char *page, \
+ size_t count) \
+{ \
+ if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags)) \
+ return -EBUSY; \
+ return nullb_device_##TYPE##_attr_store( \
+ &to_nullb_device(item)->NAME, page, count); \
+} \
+CONFIGFS_ATTR(nullb_device_, NAME);
+
+NULLB_DEVICE_ATTR(size, ulong);
+NULLB_DEVICE_ATTR(completion_nsec, ulong);
+NULLB_DEVICE_ATTR(submit_queues, uint);
+NULLB_DEVICE_ATTR(home_node, uint);
+NULLB_DEVICE_ATTR(queue_mode, uint);
+NULLB_DEVICE_ATTR(blocksize, uint);
+NULLB_DEVICE_ATTR(irqmode, uint);
+NULLB_DEVICE_ATTR(hw_queue_depth, uint);
+NULLB_DEVICE_ATTR(index, uint);
+NULLB_DEVICE_ATTR(use_lightnvm, bool);
+NULLB_DEVICE_ATTR(blocking, bool);
+NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
+NULLB_DEVICE_ATTR(memory_backed, bool);
+NULLB_DEVICE_ATTR(discard, bool);
+NULLB_DEVICE_ATTR(mbps, uint);
+NULLB_DEVICE_ATTR(cache_size, ulong);
+
+static ssize_t nullb_device_power_show(struct config_item *item, char *page)
+{
+ return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
+}
+
+static ssize_t nullb_device_power_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+ bool newp = false;
+ ssize_t ret;
+
+ ret = nullb_device_bool_attr_store(&newp, page, count);
+ if (ret < 0)
+ return ret;
+
+ if (!dev->power && newp) {
+ if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
+ return count;
+ if (null_add_dev(dev)) {
+ clear_bit(NULLB_DEV_FL_UP, &dev->flags);
+ return -ENOMEM;
+ }
+
+ set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
+ dev->power = newp;
+ } else if (dev->power && !newp) {
+ mutex_lock(&lock);
+ dev->power = newp;
+ null_del_dev(dev->nullb);
+ mutex_unlock(&lock);
+ clear_bit(NULLB_DEV_FL_UP, &dev->flags);
+ }
+
+ return count;
+}
+
+CONFIGFS_ATTR(nullb_device_, power);
+
+static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
+{
+ struct nullb_device *t_dev = to_nullb_device(item);
+
+ return badblocks_show(&t_dev->badblocks, page, 0);
+}
+
+static ssize_t nullb_device_badblocks_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nullb_device *t_dev = to_nullb_device(item);
+ char *orig, *buf, *tmp;
+ u64 start, end;
+ int ret;
+
+ orig = kstrndup(page, count, GFP_KERNEL);
+ if (!orig)
+ return -ENOMEM;
+
+ buf = strstrip(orig);
+
+ ret = -EINVAL;
+ if (buf[0] != '+' && buf[0] != '-')
+ goto out;
+ tmp = strchr(&buf[1], '-');
+ if (!tmp)
+ goto out;
+ *tmp = '\0';
+ ret = kstrtoull(buf + 1, 0, &start);
+ if (ret)
+ goto out;
+ ret = kstrtoull(tmp + 1, 0, &end);
+ if (ret)
+ goto out;
+ ret = -EINVAL;
+ if (start > end)
+ goto out;
+ /* enable badblocks */
+ cmpxchg(&t_dev->badblocks.shift, -1, 0);
+ if (buf[0] == '+')
+ ret = badblocks_set(&t_dev->badblocks, start,
+ end - start + 1, 1);
+ else
+ ret = badblocks_clear(&t_dev->badblocks, start,
+ end - start + 1);
+ if (ret == 0)
+ ret = count;
+out:
+ kfree(orig);
+ return ret;
+}
+CONFIGFS_ATTR(nullb_device_, badblocks);
+
+static struct configfs_attribute *nullb_device_attrs[] = {
+ &nullb_device_attr_size,
+ &nullb_device_attr_completion_nsec,
+ &nullb_device_attr_submit_queues,
+ &nullb_device_attr_home_node,
+ &nullb_device_attr_queue_mode,
+ &nullb_device_attr_blocksize,
+ &nullb_device_attr_irqmode,
+ &nullb_device_attr_hw_queue_depth,
+ &nullb_device_attr_index,
+ &nullb_device_attr_use_lightnvm,
+ &nullb_device_attr_blocking,
+ &nullb_device_attr_use_per_node_hctx,
+ &nullb_device_attr_power,
+ &nullb_device_attr_memory_backed,
+ &nullb_device_attr_discard,
+ &nullb_device_attr_mbps,
+ &nullb_device_attr_cache_size,
+ &nullb_device_attr_badblocks,
+ NULL,
+};
+
+static void nullb_device_release(struct config_item *item)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+
+ badblocks_exit(&dev->badblocks);
+ null_free_device_storage(dev, false);
+ null_free_dev(dev);
+}
+
+static struct configfs_item_operations nullb_device_ops = {
+ .release = nullb_device_release,
+};
+
+static struct config_item_type nullb_device_type = {
+ .ct_item_ops = &nullb_device_ops,
+ .ct_attrs = nullb_device_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct
+config_item *nullb_group_make_item(struct config_group *group, const char *name)
+{
+ struct nullb_device *dev;
+
+ dev = null_alloc_dev();
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ config_item_init_type_name(&dev->item, name, &nullb_device_type);
+
+ return &dev->item;
+}
+
+static void
+nullb_group_drop_item(struct config_group *group, struct config_item *item)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+
+ if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
+ mutex_lock(&lock);
+ dev->power = false;
+ null_del_dev(dev->nullb);
+ mutex_unlock(&lock);
+ }
+
+ config_item_put(item);
+}
+
+static ssize_t memb_group_features_show(struct config_item *item, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks\n");
+}
+
+CONFIGFS_ATTR_RO(memb_group_, features);
+
+static struct configfs_attribute *nullb_group_attrs[] = {
+ &memb_group_attr_features,
+ NULL,
+};
+
+static struct configfs_group_operations nullb_group_ops = {
+ .make_item = nullb_group_make_item,
+ .drop_item = nullb_group_drop_item,
+};
+
+static struct config_item_type nullb_group_type = {
+ .ct_group_ops = &nullb_group_ops,
+ .ct_attrs = nullb_group_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem nullb_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "nullb",
+ .ci_type = &nullb_group_type,
+ },
+ },
+};
+
+static inline int null_cache_active(struct nullb *nullb)
+{
+ return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+}
+
+static struct nullb_device *null_alloc_dev(void)
+{
+ struct nullb_device *dev;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+ INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
+ INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
+ if (badblocks_init(&dev->badblocks, 0)) {
+ kfree(dev);
+ return NULL;
+ }
+
+ dev->size = g_gb * 1024;
+ dev->completion_nsec = g_completion_nsec;
+ dev->submit_queues = g_submit_queues;
+ dev->home_node = g_home_node;
+ dev->queue_mode = g_queue_mode;
+ dev->blocksize = g_bs;
+ dev->irqmode = g_irqmode;
+ dev->hw_queue_depth = g_hw_queue_depth;
+ dev->use_lightnvm = g_use_lightnvm;
+ dev->blocking = g_blocking;
+ dev->use_per_node_hctx = g_use_per_node_hctx;
+ return dev;
+}
+
+static void null_free_dev(struct nullb_device *dev)
+{
+ kfree(dev);
+}
+
static void put_tag(struct nullb_queue *nq, unsigned int tag)
{
clear_bit_unlock(tag, nq->tag_map);
@@ -193,7 +619,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
cmd = &nq->cmds[tag];
cmd->tag = tag;
cmd->nq = nq;
- if (irqmode == NULL_IRQ_TIMER) {
+ if (nq->dev->irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
@@ -229,19 +655,21 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
static void end_cmd(struct nullb_cmd *cmd)
{
struct request_queue *q = NULL;
+ int queue_mode = cmd->nq->dev->queue_mode;
if (cmd->rq)
q = cmd->rq->q;
switch (queue_mode) {
case NULL_Q_MQ:
- blk_mq_end_request(cmd->rq, BLK_STS_OK);
+ blk_mq_end_request(cmd->rq, cmd->error);
return;
case NULL_Q_RQ:
INIT_LIST_HEAD(&cmd->rq->queuelist);
- blk_end_request_all(cmd->rq, BLK_STS_OK);
+ blk_end_request_all(cmd->rq, cmd->error);
break;
case NULL_Q_BIO:
+ cmd->bio->bi_status = cmd->error;
bio_endio(cmd->bio);
break;
}
@@ -267,25 +695,582 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
static void null_cmd_end_timer(struct nullb_cmd *cmd)
{
- ktime_t kt = completion_nsec;
+ ktime_t kt = cmd->nq->dev->completion_nsec;
hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
}
static void null_softirq_done_fn(struct request *rq)
{
- if (queue_mode == NULL_Q_MQ)
+ struct nullb *nullb = rq->q->queuedata;
+
+ if (nullb->dev->queue_mode == NULL_Q_MQ)
end_cmd(blk_mq_rq_to_pdu(rq));
else
end_cmd(rq->special);
}
-static inline void null_handle_cmd(struct nullb_cmd *cmd)
+static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
+{
+ struct nullb_page *t_page;
+
+ t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
+ if (!t_page)
+ goto out;
+
+ t_page->page = alloc_pages(gfp_flags, 0);
+ if (!t_page->page)
+ goto out_freepage;
+
+ t_page->bitmap = 0;
+ return t_page;
+out_freepage:
+ kfree(t_page);
+out:
+ return NULL;
+}
+
+static void null_free_page(struct nullb_page *t_page)
+{
+ __set_bit(NULLB_PAGE_FREE, &t_page->bitmap);
+ if (test_bit(NULLB_PAGE_LOCK, &t_page->bitmap))
+ return;
+ __free_page(t_page->page);
+ kfree(t_page);
+}
+
+static void null_free_sector(struct nullb *nullb, sector_t sector,
+ bool is_cache)
+{
+ unsigned int sector_bit;
+ u64 idx;
+ struct nullb_page *t_page, *ret;
+ struct radix_tree_root *root;
+
+ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ sector_bit = (sector & SECTOR_MASK);
+
+ t_page = radix_tree_lookup(root, idx);
+ if (t_page) {
+ __clear_bit(sector_bit, &t_page->bitmap);
+
+ if (!t_page->bitmap) {
+ ret = radix_tree_delete_item(root, idx, t_page);
+ WARN_ON(ret != t_page);
+ null_free_page(ret);
+ if (is_cache)
+ nullb->dev->curr_cache -= PAGE_SIZE;
+ }
+ }
+}
+
+static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
+ struct nullb_page *t_page, bool is_cache)
+{
+ struct radix_tree_root *root;
+
+ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+
+ if (radix_tree_insert(root, idx, t_page)) {
+ null_free_page(t_page);
+ t_page = radix_tree_lookup(root, idx);
+ WARN_ON(!t_page || t_page->page->index != idx);
+ } else if (is_cache)
+ nullb->dev->curr_cache += PAGE_SIZE;
+
+ return t_page;
+}
+
+static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
+{
+ unsigned long pos = 0;
+ int nr_pages;
+ struct nullb_page *ret, *t_pages[FREE_BATCH];
+ struct radix_tree_root *root;
+
+ root = is_cache ? &dev->cache : &dev->data;
+
+ do {
+ int i;
+
+ nr_pages = radix_tree_gang_lookup(root,
+ (void **)t_pages, pos, FREE_BATCH);
+
+ for (i = 0; i < nr_pages; i++) {
+ pos = t_pages[i]->page->index;
+ ret = radix_tree_delete_item(root, pos, t_pages[i]);
+ WARN_ON(ret != t_pages[i]);
+ null_free_page(ret);
+ }
+
+ pos++;
+ } while (nr_pages == FREE_BATCH);
+
+ if (is_cache)
+ dev->curr_cache = 0;
+}
+
+static struct nullb_page *__null_lookup_page(struct nullb *nullb,
+ sector_t sector, bool for_write, bool is_cache)
+{
+ unsigned int sector_bit;
+ u64 idx;
+ struct nullb_page *t_page;
+ struct radix_tree_root *root;
+
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ sector_bit = (sector & SECTOR_MASK);
+
+ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+ t_page = radix_tree_lookup(root, idx);
+ WARN_ON(t_page && t_page->page->index != idx);
+
+ if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap)))
+ return t_page;
+
+ return NULL;
+}
+
+static struct nullb_page *null_lookup_page(struct nullb *nullb,
+ sector_t sector, bool for_write, bool ignore_cache)
+{
+ struct nullb_page *page = NULL;
+
+ if (!ignore_cache)
+ page = __null_lookup_page(nullb, sector, for_write, true);
+ if (page)
+ return page;
+ return __null_lookup_page(nullb, sector, for_write, false);
+}
+
+static struct nullb_page *null_insert_page(struct nullb *nullb,
+ sector_t sector, bool ignore_cache)
+{
+ u64 idx;
+ struct nullb_page *t_page;
+
+ t_page = null_lookup_page(nullb, sector, true, ignore_cache);
+ if (t_page)
+ return t_page;
+
+ spin_unlock_irq(&nullb->lock);
+
+ t_page = null_alloc_page(GFP_NOIO);
+ if (!t_page)
+ goto out_lock;
+
+ if (radix_tree_preload(GFP_NOIO))
+ goto out_freepage;
+
+ spin_lock_irq(&nullb->lock);
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ t_page->page->index = idx;
+ t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
+ radix_tree_preload_end();
+
+ return t_page;
+out_freepage:
+ null_free_page(t_page);
+out_lock:
+ spin_lock_irq(&nullb->lock);
+ return null_lookup_page(nullb, sector, true, ignore_cache);
+}
+
+static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
+{
+ int i;
+ unsigned int offset;
+ u64 idx;
+ struct nullb_page *t_page, *ret;
+ void *dst, *src;
+
+ idx = c_page->page->index;
+
+ t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
+
+ __clear_bit(NULLB_PAGE_LOCK, &c_page->bitmap);
+ if (test_bit(NULLB_PAGE_FREE, &c_page->bitmap)) {
+ null_free_page(c_page);
+ if (t_page && t_page->bitmap == 0) {
+ ret = radix_tree_delete_item(&nullb->dev->data,
+ idx, t_page);
+ null_free_page(t_page);
+ }
+ return 0;
+ }
+
+ if (!t_page)
+ return -ENOMEM;
+
+ src = kmap_atomic(c_page->page);
+ dst = kmap_atomic(t_page->page);
+
+ for (i = 0; i < PAGE_SECTORS;
+ i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
+ if (test_bit(i, &c_page->bitmap)) {
+ offset = (i << SECTOR_SHIFT);
+ memcpy(dst + offset, src + offset,
+ nullb->dev->blocksize);
+ __set_bit(i, &t_page->bitmap);
+ }
+ }
+
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+
+ ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
+ null_free_page(ret);
+ nullb->dev->curr_cache -= PAGE_SIZE;
+
+ return 0;
+}
+
+static int null_make_cache_space(struct nullb *nullb, unsigned long n)
{
+ int i, err, nr_pages;
+ struct nullb_page *c_pages[FREE_BATCH];
+ unsigned long flushed = 0, one_round;
+
+again:
+ if ((nullb->dev->cache_size * 1024 * 1024) >
+ nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
+ return 0;
+
+ nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
+ (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
+ /*
+ * nullb_flush_cache_page could unlock before using the c_pages. To
+ * avoid race, we don't allow page free
+ */
+ for (i = 0; i < nr_pages; i++) {
+ nullb->cache_flush_pos = c_pages[i]->page->index;
+ /*
+ * We found the page which is being flushed to disk by other
+ * threads
+ */
+ if (test_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap))
+ c_pages[i] = NULL;
+ else
+ __set_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap);
+ }
+
+ one_round = 0;
+ for (i = 0; i < nr_pages; i++) {
+ if (c_pages[i] == NULL)
+ continue;
+ err = null_flush_cache_page(nullb, c_pages[i]);
+ if (err)
+ return err;
+ one_round++;
+ }
+ flushed += one_round << PAGE_SHIFT;
+
+ if (n > flushed) {
+ if (nr_pages == 0)
+ nullb->cache_flush_pos = 0;
+ if (one_round == 0) {
+ /* give other threads a chance */
+ spin_unlock_irq(&nullb->lock);
+ spin_lock_irq(&nullb->lock);
+ }
+ goto again;
+ }
+ return 0;
+}
+
+static int copy_to_nullb(struct nullb *nullb, struct page *source,
+ unsigned int off, sector_t sector, size_t n, bool is_fua)
+{
+ size_t temp, count = 0;
+ unsigned int offset;
+ struct nullb_page *t_page;
+ void *dst, *src;
+
+ while (count < n) {
+ temp = min_t(size_t, nullb->dev->blocksize, n - count);
+
+ if (null_cache_active(nullb) && !is_fua)
+ null_make_cache_space(nullb, PAGE_SIZE);
+
+ offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+ t_page = null_insert_page(nullb, sector,
+ !null_cache_active(nullb) || is_fua);
+ if (!t_page)
+ return -ENOSPC;
+
+ src = kmap_atomic(source);
+ dst = kmap_atomic(t_page->page);
+ memcpy(dst + offset, src + off + count, temp);
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+
+ __set_bit(sector & SECTOR_MASK, &t_page->bitmap);
+
+ if (is_fua)
+ null_free_sector(nullb, sector, true);
+
+ count += temp;
+ sector += temp >> SECTOR_SHIFT;
+ }
+ return 0;
+}
+
+static int copy_from_nullb(struct nullb *nullb, struct page *dest,
+ unsigned int off, sector_t sector, size_t n)
+{
+ size_t temp, count = 0;
+ unsigned int offset;
+ struct nullb_page *t_page;
+ void *dst, *src;
+
+ while (count < n) {
+ temp = min_t(size_t, nullb->dev->blocksize, n - count);
+
+ offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+ t_page = null_lookup_page(nullb, sector, false,
+ !null_cache_active(nullb));
+
+ dst = kmap_atomic(dest);
+ if (!t_page) {
+ memset(dst + off + count, 0, temp);
+ goto next;
+ }
+ src = kmap_atomic(t_page->page);
+ memcpy(dst + off + count, src + offset, temp);
+ kunmap_atomic(src);
+next:
+ kunmap_atomic(dst);
+
+ count += temp;
+ sector += temp >> SECTOR_SHIFT;
+ }
+ return 0;
+}
+
+static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
+{
+ size_t temp;
+
+ spin_lock_irq(&nullb->lock);
+ while (n > 0) {
+ temp = min_t(size_t, n, nullb->dev->blocksize);
+ null_free_sector(nullb, sector, false);
+ if (null_cache_active(nullb))
+ null_free_sector(nullb, sector, true);
+ sector += temp >> SECTOR_SHIFT;
+ n -= temp;
+ }
+ spin_unlock_irq(&nullb->lock);
+}
+
+static int null_handle_flush(struct nullb *nullb)
+{
+ int err;
+
+ if (!null_cache_active(nullb))
+ return 0;
+
+ spin_lock_irq(&nullb->lock);
+ while (true) {
+ err = null_make_cache_space(nullb,
+ nullb->dev->cache_size * 1024 * 1024);
+ if (err || nullb->dev->curr_cache == 0)
+ break;
+ }
+
+ WARN_ON(!radix_tree_empty(&nullb->dev->cache));
+ spin_unlock_irq(&nullb->lock);
+ return err;
+}
+
+static int null_transfer(struct nullb *nullb, struct page *page,
+ unsigned int len, unsigned int off, bool is_write, sector_t sector,
+ bool is_fua)
+{
+ int err = 0;
+
+ if (!is_write) {
+ err = copy_from_nullb(nullb, page, off, sector, len);
+ flush_dcache_page(page);
+ } else {
+ flush_dcache_page(page);
+ err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
+ }
+
+ return err;
+}
+
+static int null_handle_rq(struct nullb_cmd *cmd)
+{
+ struct request *rq = cmd->rq;
+ struct nullb *nullb = cmd->nq->dev->nullb;
+ int err;
+ unsigned int len;
+ sector_t sector;
+ struct req_iterator iter;
+ struct bio_vec bvec;
+
+ sector = blk_rq_pos(rq);
+
+ if (req_op(rq) == REQ_OP_DISCARD) {
+ null_handle_discard(nullb, sector, blk_rq_bytes(rq));
+ return 0;
+ }
+
+ spin_lock_irq(&nullb->lock);
+ rq_for_each_segment(bvec, rq, iter) {
+ len = bvec.bv_len;
+ err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+ op_is_write(req_op(rq)), sector,
+ req_op(rq) & REQ_FUA);
+ if (err) {
+ spin_unlock_irq(&nullb->lock);
+ return err;
+ }
+ sector += len >> SECTOR_SHIFT;
+ }
+ spin_unlock_irq(&nullb->lock);
+
+ return 0;
+}
+
+static int null_handle_bio(struct nullb_cmd *cmd)
+{
+ struct bio *bio = cmd->bio;
+ struct nullb *nullb = cmd->nq->dev->nullb;
+ int err;
+ unsigned int len;
+ sector_t sector;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+
+ sector = bio->bi_iter.bi_sector;
+
+ if (bio_op(bio) == REQ_OP_DISCARD) {
+ null_handle_discard(nullb, sector,
+ bio_sectors(bio) << SECTOR_SHIFT);
+ return 0;
+ }
+
+ spin_lock_irq(&nullb->lock);
+ bio_for_each_segment(bvec, bio, iter) {
+ len = bvec.bv_len;
+ err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+ op_is_write(bio_op(bio)), sector,
+ bio_op(bio) & REQ_FUA);
+ if (err) {
+ spin_unlock_irq(&nullb->lock);
+ return err;
+ }
+ sector += len >> SECTOR_SHIFT;
+ }
+ spin_unlock_irq(&nullb->lock);
+ return 0;
+}
+
+static void null_stop_queue(struct nullb *nullb)
+{
+ struct request_queue *q = nullb->q;
+
+ if (nullb->dev->queue_mode == NULL_Q_MQ)
+ blk_mq_stop_hw_queues(q);
+ else {
+ spin_lock_irq(q->queue_lock);
+ blk_stop_queue(q);
+ spin_unlock_irq(q->queue_lock);
+ }
+}
+
+static void null_restart_queue_async(struct nullb *nullb)
+{
+ struct request_queue *q = nullb->q;
+ unsigned long flags;
+
+ if (nullb->dev->queue_mode == NULL_Q_MQ)
+ blk_mq_start_stopped_hw_queues(q, true);
+ else {
+ spin_lock_irqsave(q->queue_lock, flags);
+ blk_start_queue_async(q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+ }
+}
+
+static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
+{
+ struct nullb_device *dev = cmd->nq->dev;
+ struct nullb *nullb = dev->nullb;
+ int err = 0;
+
+ if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
+ struct request *rq = cmd->rq;
+
+ if (!hrtimer_active(&nullb->bw_timer))
+ hrtimer_restart(&nullb->bw_timer);
+
+ if (atomic_long_sub_return(blk_rq_bytes(rq),
+ &nullb->cur_bytes) < 0) {
+ null_stop_queue(nullb);
+ /* race with timer */
+ if (atomic_long_read(&nullb->cur_bytes) > 0)
+ null_restart_queue_async(nullb);
+ if (dev->queue_mode == NULL_Q_RQ) {
+ struct request_queue *q = nullb->q;
+
+ spin_lock_irq(q->queue_lock);
+ rq->rq_flags |= RQF_DONTPREP;
+ blk_requeue_request(q, rq);
+ spin_unlock_irq(q->queue_lock);
+ return BLK_STS_OK;
+ } else
+ /* requeue request */
+ return BLK_STS_RESOURCE;
+ }
+ }
+
+ if (nullb->dev->badblocks.shift != -1) {
+ int bad_sectors;
+ sector_t sector, size, first_bad;
+ bool is_flush = true;
+
+ if (dev->queue_mode == NULL_Q_BIO &&
+ bio_op(cmd->bio) != REQ_OP_FLUSH) {
+ is_flush = false;
+ sector = cmd->bio->bi_iter.bi_sector;
+ size = bio_sectors(cmd->bio);
+ }
+ if (dev->queue_mode != NULL_Q_BIO &&
+ req_op(cmd->rq) != REQ_OP_FLUSH) {
+ is_flush = false;
+ sector = blk_rq_pos(cmd->rq);
+ size = blk_rq_sectors(cmd->rq);
+ }
+ if (!is_flush && badblocks_check(&nullb->dev->badblocks, sector,
+ size, &first_bad, &bad_sectors)) {
+ cmd->error = BLK_STS_IOERR;
+ goto out;
+ }
+ }
+
+ if (dev->memory_backed) {
+ if (dev->queue_mode == NULL_Q_BIO) {
+ if (bio_op(cmd->bio) == REQ_OP_FLUSH)
+ err = null_handle_flush(nullb);
+ else
+ err = null_handle_bio(cmd);
+ } else {
+ if (req_op(cmd->rq) == REQ_OP_FLUSH)
+ err = null_handle_flush(nullb);
+ else
+ err = null_handle_rq(cmd);
+ }
+ }
+ cmd->error = errno_to_blk_status(err);
+out:
/* Complete IO by inline, softirq or timer */
- switch (irqmode) {
+ switch (dev->irqmode) {
case NULL_IRQ_SOFTIRQ:
- switch (queue_mode) {
+ switch (dev->queue_mode) {
case NULL_Q_MQ:
blk_mq_complete_request(cmd->rq);
break;
@@ -307,6 +1292,34 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
null_cmd_end_timer(cmd);
break;
}
+ return BLK_STS_OK;
+}
+
+static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
+{
+ struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
+ ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+ unsigned int mbps = nullb->dev->mbps;
+
+ if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
+ return HRTIMER_NORESTART;
+
+ atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
+ null_restart_queue_async(nullb);
+
+ hrtimer_forward_now(&nullb->bw_timer, timer_interval);
+
+ return HRTIMER_RESTART;
+}
+
+static void nullb_setup_bwtimer(struct nullb *nullb)
+{
+ ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+
+ hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ nullb->bw_timer.function = nullb_bwtimer_fn;
+ atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
+ hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
}
static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
@@ -366,20 +1379,20 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ struct nullb_queue *nq = hctx->driver_data;
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
- if (irqmode == NULL_IRQ_TIMER) {
+ if (nq->dev->irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
}
cmd->rq = bd->rq;
- cmd->nq = hctx->driver_data;
+ cmd->nq = nq;
blk_mq_start_request(bd->rq);
- null_handle_cmd(cmd);
- return BLK_STS_OK;
+ return null_handle_cmd(cmd);
}
static const struct blk_mq_ops null_mq_ops = {
@@ -438,7 +1451,8 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
{
- sector_t size = gb * 1024 * 1024 * 1024ULL;
+ struct nullb *nullb = dev->q->queuedata;
+ sector_t size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
sector_t blksize;
struct nvm_id_group *grp;
@@ -460,7 +1474,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
id->ppaf.ch_offset = 56;
id->ppaf.ch_len = 8;
- sector_div(size, bs); /* convert size to pages */
+ sector_div(size, nullb->dev->blocksize); /* convert size to pages */
size >>= 8; /* concert size to pgs pr blk */
grp = &id->grp;
grp->mtype = 0;
@@ -474,8 +1488,8 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
grp->num_blk = blksize;
grp->num_pln = 1;
- grp->fpg_sz = bs;
- grp->csecs = bs;
+ grp->fpg_sz = nullb->dev->blocksize;
+ grp->csecs = nullb->dev->blocksize;
grp->trdt = 25000;
grp->trdm = 25000;
grp->tprt = 500000;
@@ -483,7 +1497,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
grp->tbet = 1500000;
grp->tbem = 1500000;
grp->mpos = 0x010101; /* single plane rwe */
- grp->cpar = hw_queue_depth;
+ grp->cpar = nullb->dev->hw_queue_depth;
return 0;
}
@@ -568,19 +1582,44 @@ static void null_nvm_unregister(struct nullb *nullb) {}
static void null_del_dev(struct nullb *nullb)
{
+ struct nullb_device *dev = nullb->dev;
+
+ ida_simple_remove(&nullb_indexes, nullb->index);
+
list_del_init(&nullb->list);
- if (use_lightnvm)
+ if (dev->use_lightnvm)
null_nvm_unregister(nullb);
else
del_gendisk(nullb->disk);
+
+ if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
+ hrtimer_cancel(&nullb->bw_timer);
+ atomic_long_set(&nullb->cur_bytes, LONG_MAX);
+ null_restart_queue_async(nullb);
+ }
+
blk_cleanup_queue(nullb->q);
- if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ if (dev->queue_mode == NULL_Q_MQ &&
+ nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
- if (!use_lightnvm)
+ if (!dev->use_lightnvm)
put_disk(nullb->disk);
cleanup_queues(nullb);
+ if (null_cache_active(nullb))
+ null_free_device_storage(nullb->dev, true);
kfree(nullb);
+ dev->nullb = NULL;
+}
+
+static void null_config_discard(struct nullb *nullb)
+{
+ if (nullb->dev->discard == false)
+ return;
+ nullb->q->limits.discard_granularity = nullb->dev->blocksize;
+ nullb->q->limits.discard_alignment = nullb->dev->blocksize;
+ blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q);
}
static int null_open(struct block_device *bdev, fmode_t mode)
@@ -605,6 +1644,7 @@ static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
init_waitqueue_head(&nq->wait);
nq->queue_depth = nullb->queue_depth;
+ nq->dev = nullb->dev;
}
static void null_init_queues(struct nullb *nullb)
@@ -652,13 +1692,13 @@ static int setup_commands(struct nullb_queue *nq)
static int setup_queues(struct nullb *nullb)
{
- nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
- GFP_KERNEL);
+ nullb->queues = kzalloc(nullb->dev->submit_queues *
+ sizeof(struct nullb_queue), GFP_KERNEL);
if (!nullb->queues)
return -ENOMEM;
nullb->nr_queues = 0;
- nullb->queue_depth = hw_queue_depth;
+ nullb->queue_depth = nullb->dev->hw_queue_depth;
return 0;
}
@@ -668,7 +1708,7 @@ static int init_driver_queues(struct nullb *nullb)
struct nullb_queue *nq;
int i, ret = 0;
- for (i = 0; i < submit_queues; i++) {
+ for (i = 0; i < nullb->dev->submit_queues; i++) {
nq = &nullb->queues[i];
null_init_queue(nullb, nq);
@@ -686,10 +1726,10 @@ static int null_gendisk_register(struct nullb *nullb)
struct gendisk *disk;
sector_t size;
- disk = nullb->disk = alloc_disk_node(1, home_node);
+ disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
if (!disk)
return -ENOMEM;
- size = gb * 1024 * 1024 * 1024ULL;
+ size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
set_capacity(disk, size >> 9);
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
@@ -704,49 +1744,86 @@ static int null_gendisk_register(struct nullb *nullb)
return 0;
}
-static int null_init_tag_set(struct blk_mq_tag_set *set)
+static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
{
set->ops = &null_mq_ops;
- set->nr_hw_queues = submit_queues;
- set->queue_depth = hw_queue_depth;
- set->numa_node = home_node;
+ set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
+ g_submit_queues;
+ set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
+ g_hw_queue_depth;
+ set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
set->cmd_size = sizeof(struct nullb_cmd);
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->driver_data = NULL;
- if (blocking)
+ if ((nullb && nullb->dev->blocking) || g_blocking)
set->flags |= BLK_MQ_F_BLOCKING;
return blk_mq_alloc_tag_set(set);
}
-static int null_add_dev(void)
+static void null_validate_conf(struct nullb_device *dev)
+{
+ dev->blocksize = round_down(dev->blocksize, 512);
+ dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
+ if (dev->use_lightnvm && dev->blocksize != 4096)
+ dev->blocksize = 4096;
+
+ if (dev->use_lightnvm && dev->queue_mode != NULL_Q_MQ)
+ dev->queue_mode = NULL_Q_MQ;
+
+ if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
+ if (dev->submit_queues != nr_online_nodes)
+ dev->submit_queues = nr_online_nodes;
+ } else if (dev->submit_queues > nr_cpu_ids)
+ dev->submit_queues = nr_cpu_ids;
+ else if (dev->submit_queues == 0)
+ dev->submit_queues = 1;
+
+ dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
+ dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
+
+ /* Do memory allocation, so set blocking */
+ if (dev->memory_backed)
+ dev->blocking = true;
+ else /* cache is meaningless */
+ dev->cache_size = 0;
+ dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
+ dev->cache_size);
+ dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
+ /* can not stop a queue */
+ if (dev->queue_mode == NULL_Q_BIO)
+ dev->mbps = 0;
+}
+
+static int null_add_dev(struct nullb_device *dev)
{
struct nullb *nullb;
int rv;
- nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
+ null_validate_conf(dev);
+
+ nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
if (!nullb) {
rv = -ENOMEM;
goto out;
}
+ nullb->dev = dev;
+ dev->nullb = nullb;
spin_lock_init(&nullb->lock);
- if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
- submit_queues = nr_online_nodes;
-
rv = setup_queues(nullb);
if (rv)
goto out_free_nullb;
- if (queue_mode == NULL_Q_MQ) {
+ if (dev->queue_mode == NULL_Q_MQ) {
if (shared_tags) {
nullb->tag_set = &tag_set;
rv = 0;
} else {
nullb->tag_set = &nullb->__tag_set;
- rv = null_init_tag_set(nullb->tag_set);
+ rv = null_init_tag_set(nullb, nullb->tag_set);
}
if (rv)
@@ -758,8 +1835,8 @@ static int null_add_dev(void)
goto out_cleanup_tags;
}
null_init_queues(nullb);
- } else if (queue_mode == NULL_Q_BIO) {
- nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
+ } else if (dev->queue_mode == NULL_Q_BIO) {
+ nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
@@ -769,7 +1846,8 @@ static int null_add_dev(void)
if (rv)
goto out_cleanup_blk_queue;
} else {
- nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
+ nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock,
+ dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
@@ -781,20 +1859,34 @@ static int null_add_dev(void)
goto out_cleanup_blk_queue;
}
+ if (dev->mbps) {
+ set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
+ nullb_setup_bwtimer(nullb);
+ }
+
+ if (dev->cache_size > 0) {
+ set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+ blk_queue_write_cache(nullb->q, true, true);
+ blk_queue_flush_queueable(nullb->q, true);
+ }
+
nullb->q->queuedata = nullb;
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
mutex_lock(&lock);
- nullb->index = nullb_indexes++;
+ nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+ dev->index = nullb->index;
mutex_unlock(&lock);
- blk_queue_logical_block_size(nullb->q, bs);
- blk_queue_physical_block_size(nullb->q, bs);
+ blk_queue_logical_block_size(nullb->q, dev->blocksize);
+ blk_queue_physical_block_size(nullb->q, dev->blocksize);
+
+ null_config_discard(nullb);
sprintf(nullb->disk_name, "nullb%d", nullb->index);
- if (use_lightnvm)
+ if (dev->use_lightnvm)
rv = null_nvm_register(nullb);
else
rv = null_gendisk_register(nullb);
@@ -810,7 +1902,7 @@ static int null_add_dev(void)
out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q);
out_cleanup_tags:
- if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
out_cleanup_queues:
cleanup_queues(nullb);
@@ -825,51 +1917,63 @@ static int __init null_init(void)
int ret = 0;
unsigned int i;
struct nullb *nullb;
+ struct nullb_device *dev;
+
+ /* check for nullb_page.bitmap */
+ if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT))
+ return -EINVAL;
- if (bs > PAGE_SIZE) {
+ if (g_bs > PAGE_SIZE) {
pr_warn("null_blk: invalid block size\n");
pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
- bs = PAGE_SIZE;
+ g_bs = PAGE_SIZE;
}
- if (use_lightnvm && bs != 4096) {
+ if (g_use_lightnvm && g_bs != 4096) {
pr_warn("null_blk: LightNVM only supports 4k block size\n");
pr_warn("null_blk: defaults block size to 4k\n");
- bs = 4096;
+ g_bs = 4096;
}
- if (use_lightnvm && queue_mode != NULL_Q_MQ) {
+ if (g_use_lightnvm && g_queue_mode != NULL_Q_MQ) {
pr_warn("null_blk: LightNVM only supported for blk-mq\n");
pr_warn("null_blk: defaults queue mode to blk-mq\n");
- queue_mode = NULL_Q_MQ;
+ g_queue_mode = NULL_Q_MQ;
}
- if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
- if (submit_queues < nr_online_nodes) {
- pr_warn("null_blk: submit_queues param is set to %u.",
+ if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+ if (g_submit_queues != nr_online_nodes) {
+ pr_warn("null_blk: submit_queues param is set to %u.\n",
nr_online_nodes);
- submit_queues = nr_online_nodes;
+ g_submit_queues = nr_online_nodes;
}
- } else if (submit_queues > nr_cpu_ids)
- submit_queues = nr_cpu_ids;
- else if (!submit_queues)
- submit_queues = 1;
+ } else if (g_submit_queues > nr_cpu_ids)
+ g_submit_queues = nr_cpu_ids;
+ else if (g_submit_queues <= 0)
+ g_submit_queues = 1;
- if (queue_mode == NULL_Q_MQ && shared_tags) {
- ret = null_init_tag_set(&tag_set);
+ if (g_queue_mode == NULL_Q_MQ && shared_tags) {
+ ret = null_init_tag_set(NULL, &tag_set);
if (ret)
return ret;
}
+ config_group_init(&nullb_subsys.su_group);
+ mutex_init(&nullb_subsys.su_mutex);
+
+ ret = configfs_register_subsystem(&nullb_subsys);
+ if (ret)
+ goto err_tagset;
+
mutex_init(&lock);
null_major = register_blkdev(0, "nullb");
if (null_major < 0) {
ret = null_major;
- goto err_tagset;
+ goto err_conf;
}
- if (use_lightnvm) {
+ if (g_use_lightnvm) {
ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
0, 0, NULL);
if (!ppa_cache) {
@@ -880,9 +1984,14 @@ static int __init null_init(void)
}
for (i = 0; i < nr_devices; i++) {
- ret = null_add_dev();
- if (ret)
+ dev = null_alloc_dev();
+ if (!dev)
+ goto err_dev;
+ ret = null_add_dev(dev);
+ if (ret) {
+ null_free_dev(dev);
goto err_dev;
+ }
}
pr_info("null: module loaded\n");
@@ -891,13 +2000,17 @@ static int __init null_init(void)
err_dev:
while (!list_empty(&nullb_list)) {
nullb = list_entry(nullb_list.next, struct nullb, list);
+ dev = nullb->dev;
null_del_dev(nullb);
+ null_free_dev(dev);
}
kmem_cache_destroy(ppa_cache);
err_ppa:
unregister_blkdev(null_major, "nullb");
+err_conf:
+ configfs_unregister_subsystem(&nullb_subsys);
err_tagset:
- if (queue_mode == NULL_Q_MQ && shared_tags)
+ if (g_queue_mode == NULL_Q_MQ && shared_tags)
blk_mq_free_tag_set(&tag_set);
return ret;
}
@@ -906,16 +2019,22 @@ static void __exit null_exit(void)
{
struct nullb *nullb;
+ configfs_unregister_subsystem(&nullb_subsys);
+
unregister_blkdev(null_major, "nullb");
mutex_lock(&lock);
while (!list_empty(&nullb_list)) {
+ struct nullb_device *dev;
+
nullb = list_entry(nullb_list.next, struct nullb, list);
+ dev = nullb->dev;
null_del_dev(nullb);
+ null_free_dev(dev);
}
mutex_unlock(&lock);
- if (queue_mode == NULL_Q_MQ && shared_tags)
+ if (g_queue_mode == NULL_Q_MQ && shared_tags)
blk_mq_free_tag_set(&tag_set);
kmem_cache_destroy(ppa_cache);
@@ -924,5 +2043,5 @@ static void __exit null_exit(void)
module_init(null_init);
module_exit(null_exit);
-MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
+MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 6b8b097abbb9..67974796c350 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1028,7 +1028,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
bio = pkt->r_bios[f];
bio_reset(bio);
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
- bio->bi_bdev = pd->bdev;
+ bio_set_dev(bio, pd->bdev);
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
@@ -1122,7 +1122,7 @@ static int pkt_start_recovery(struct packet_data *pkt)
pkt->sector = new_sector;
bio_reset(pkt->bio);
- pkt->bio->bi_bdev = pd->bdev;
+ bio_set_set(pkt->bio, pd->bdev);
bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0);
pkt->bio->bi_iter.bi_sector = new_sector;
pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
@@ -1267,7 +1267,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
bio_reset(pkt->w_bio);
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
- pkt->w_bio->bi_bdev = pd->bdev;
+ bio_set_dev(pkt->w_bio, pd->bdev);
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
@@ -2314,7 +2314,7 @@ static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
psd->pd = pd;
psd->bio = bio;
- cloned_bio->bi_bdev = pd->bdev;
+ bio_set_dev(cloned_bio, pd->bdev);
cloned_bio->bi_private = psd;
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
pd->stats.secs_r += bio_sectors(bio);
@@ -2415,8 +2415,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
pd = q->queuedata;
if (!pd) {
- pr_err("%s incorrect request queue\n",
- bdevname(bio->bi_bdev, b));
+ pr_err("%s incorrect request queue\n", bio_devname(bio, b));
goto end_io;
}
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index e0e81cacd781..6a55959cbf78 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -409,10 +409,8 @@ static int ps3vram_cache_init(struct ps3_system_bus_device *dev)
priv->cache.page_size = CACHE_PAGE_SIZE;
priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
CACHE_PAGE_COUNT, GFP_KERNEL);
- if (priv->cache.tags == NULL) {
- dev_err(&dev->core, "Could not allocate cache tags\n");
+ if (!priv->cache.tags)
return -ENOMEM;
- }
dev_info(&dev->core, "Created ram cache: %d entries, %d KiB each\n",
CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
@@ -743,7 +741,11 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
goto out_unmap_reports;
}
- ps3vram_cache_init(dev);
+ error = ps3vram_cache_init(dev);
+ if (error < 0) {
+ goto out_unmap_reports;
+ }
+
ps3vram_proc_init(dev);
queue = blk_alloc_queue(GFP_KERNEL);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 7f4acebf4657..e397d3ee7308 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -112,7 +112,7 @@ static const struct block_device_operations rsxx_fops = {
static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
{
- generic_start_io_acct(bio_data_dir(bio), bio_sectors(bio),
+ generic_start_io_acct(card->queue, bio_data_dir(bio), bio_sectors(bio),
&card->gendisk->part0);
}
@@ -120,8 +120,8 @@ static void disk_stats_complete(struct rsxx_cardinfo *card,
struct bio *bio,
unsigned long start_time)
{
- generic_end_io_acct(bio_data_dir(bio), &card->gendisk->part0,
- start_time);
+ generic_end_io_acct(card->queue, bio_data_dir(bio),
+ &card->gendisk->part0, start_time);
}
static void bio_dma_done_cb(struct rsxx_cardinfo *card,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index d0368682bd43..7cedb4295e9d 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -1,19 +1,12 @@
-/* Copyright 2012 STEC, Inc.
+/*
+ * Driver for sTec s1120 PCIe SSDs. sTec was acquired in 2013 by HGST and HGST
+ * was acquired by Western Digital in 2012.
+ *
+ * Copyright 2012 sTec, Inc.
+ * Copyright (c) 2017 Western Digital Corporation or its affiliates.
*
- * This file is licensed under the terms of the 3-clause
- * BSD License (http://opensource.org/licenses/BSD-3-Clause)
- * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
- * at your option. Both licenses are also available in the LICENSE file
- * distributed with this project. This file may not be copied, modified,
- * or distributed except in accordance with those terms.
- * Gordoni Waidhofer <gwaidhofer@stec-inc.com>
- * Initial Driver Design!
- * Thomas Swann <tswann@stec-inc.com>
- * Interrupt handling.
- * Ramprasad Chinthekindi <rchinthekindi@stec-inc.com>
- * biomode implementation.
- * Akhil Bhansali <abhansali@stec-inc.com>
- * Added support for DISCARD / FLUSH and FUA.
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
*/
#include <linux/kernel.h>
@@ -23,11 +16,11 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/compiler.h>
#include <linux/workqueue.h>
-#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/hdreg.h>
@@ -37,9 +30,9 @@
#include <linux/version.h>
#include <linux/err.h>
#include <linux/aer.h>
-#include <linux/ctype.h>
#include <linux/wait.h>
-#include <linux/uio.h>
+#include <linux/stringify.h>
+#include <linux/slab_def.h>
#include <scsi/scsi.h>
#include <scsi/sg.h>
#include <linux/io.h>
@@ -51,19 +44,6 @@
static int skd_dbg_level;
static int skd_isr_comp_limit = 4;
-enum {
- STEC_LINK_2_5GTS = 0,
- STEC_LINK_5GTS = 1,
- STEC_LINK_8GTS = 2,
- STEC_LINK_UNKNOWN = 0xFF
-};
-
-enum {
- SKD_FLUSH_INITIALIZER,
- SKD_FLUSH_ZERO_SIZE_FIRST,
- SKD_FLUSH_DATA_SECOND,
-};
-
#define SKD_ASSERT(expr) \
do { \
if (unlikely(!(expr))) { \
@@ -73,17 +53,11 @@ enum {
} while (0)
#define DRV_NAME "skd"
-#define DRV_VERSION "2.2.1"
-#define DRV_BUILD_ID "0260"
#define PFX DRV_NAME ": "
-#define DRV_BIN_VERSION 0x100
-#define DRV_VER_COMPL "2.2.1." DRV_BUILD_ID
-MODULE_AUTHOR("bug-reports: support@stec-inc.com");
-MODULE_LICENSE("Dual BSD/GPL");
+MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("STEC s1120 PCIe SSD block driver (b" DRV_BUILD_ID ")");
-MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
+MODULE_DESCRIPTION("STEC s1120 PCIe SSD block driver");
#define PCI_VENDOR_ID_STEC 0x1B39
#define PCI_DEVICE_ID_S1120 0x0001
@@ -96,34 +70,32 @@ MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
#define SKD_PAUSE_TIMEOUT (5 * 1000)
#define SKD_N_FITMSG_BYTES (512u)
+#define SKD_MAX_REQ_PER_MSG 14
-#define SKD_N_SPECIAL_CONTEXT 32u
#define SKD_N_SPECIAL_FITMSG_BYTES (128u)
/* SG elements are 32 bytes, so we can make this 4096 and still be under the
* 128KB limit. That allows 4096*4K = 16M xfer size
*/
#define SKD_N_SG_PER_REQ_DEFAULT 256u
-#define SKD_N_SG_PER_SPECIAL 256u
#define SKD_N_COMPLETION_ENTRY 256u
#define SKD_N_READ_CAP_BYTES (8u)
#define SKD_N_INTERNAL_BYTES (512u)
+#define SKD_SKCOMP_SIZE \
+ ((sizeof(struct fit_completion_entry_v1) + \
+ sizeof(struct fit_comp_error_info)) * SKD_N_COMPLETION_ENTRY)
+
/* 5 bits of uniqifier, 0xF800 */
-#define SKD_ID_INCR (0x400)
#define SKD_ID_TABLE_MASK (3u << 8u)
#define SKD_ID_RW_REQUEST (0u << 8u)
#define SKD_ID_INTERNAL (1u << 8u)
-#define SKD_ID_SPECIAL_REQUEST (2u << 8u)
#define SKD_ID_FIT_MSG (3u << 8u)
#define SKD_ID_SLOT_MASK 0x00FFu
#define SKD_ID_SLOT_AND_TABLE_MASK 0x03FFu
-#define SKD_N_TIMEOUT_SLOT 4u
-#define SKD_TIMEOUT_SLOT_MASK 3u
-
#define SKD_N_MAX_SECTORS 2048u
#define SKD_MAX_RETRIES 2u
@@ -141,7 +113,6 @@ enum skd_drvr_state {
SKD_DRVR_STATE_ONLINE,
SKD_DRVR_STATE_PAUSING,
SKD_DRVR_STATE_PAUSED,
- SKD_DRVR_STATE_DRAINING_TIMEOUT,
SKD_DRVR_STATE_RESTARTING,
SKD_DRVR_STATE_RESUMING,
SKD_DRVR_STATE_STOPPING,
@@ -158,7 +129,6 @@ enum skd_drvr_state {
#define SKD_WAIT_BOOT_TIMO SKD_TIMER_SECONDS(90u)
#define SKD_STARTING_TIMO SKD_TIMER_SECONDS(8u)
#define SKD_RESTARTING_TIMO SKD_TIMER_MINUTES(4u)
-#define SKD_DRAINING_TIMO SKD_TIMER_SECONDS(6u)
#define SKD_BUSY_TIMO SKD_TIMER_MINUTES(20u)
#define SKD_STARTED_BUSY_TIMO SKD_TIMER_SECONDS(60u)
#define SKD_START_WAIT_SECONDS 90u
@@ -169,12 +139,6 @@ enum skd_req_state {
SKD_REQ_STATE_BUSY,
SKD_REQ_STATE_COMPLETED,
SKD_REQ_STATE_TIMEOUT,
- SKD_REQ_STATE_ABORTED,
-};
-
-enum skd_fit_msg_state {
- SKD_MSG_STATE_IDLE,
- SKD_MSG_STATE_BUSY,
};
enum skd_check_status_action {
@@ -185,34 +149,29 @@ enum skd_check_status_action {
SKD_CHECK_STATUS_BUSY_IMMINENT,
};
-struct skd_fitmsg_context {
- enum skd_fit_msg_state state;
-
- struct skd_fitmsg_context *next;
+struct skd_msg_buf {
+ struct fit_msg_hdr fmh;
+ struct skd_scsi_request scsi[SKD_MAX_REQ_PER_MSG];
+};
+struct skd_fitmsg_context {
u32 id;
- u16 outstanding;
u32 length;
- u32 offset;
- u8 *msg_buf;
+ struct skd_msg_buf *msg_buf;
dma_addr_t mb_dma_address;
};
struct skd_request_context {
enum skd_req_state state;
- struct skd_request_context *next;
-
u16 id;
u32 fitmsg_id;
- struct request *req;
u8 flush_cmd;
- u32 timeout_stamp;
- u8 sg_data_dir;
+ enum dma_data_direction data_dir;
struct scatterlist *sg;
u32 n_sg;
u32 sg_byte_count;
@@ -224,38 +183,19 @@ struct skd_request_context {
struct fit_comp_error_info err_info;
+ blk_status_t status;
};
-#define SKD_DATA_DIR_HOST_TO_CARD 1
-#define SKD_DATA_DIR_CARD_TO_HOST 2
struct skd_special_context {
struct skd_request_context req;
- u8 orphaned;
-
void *data_buf;
dma_addr_t db_dma_address;
- u8 *msg_buf;
+ struct skd_msg_buf *msg_buf;
dma_addr_t mb_dma_address;
};
-struct skd_sg_io {
- fmode_t mode;
- void __user *argp;
-
- struct sg_io_hdr sg;
-
- u8 cdb[16];
-
- u32 dxfer_len;
- u32 iovcnt;
- struct sg_iovec *iov;
- struct sg_iovec no_iov_iov;
-
- struct skd_special_context *skspcl;
-};
-
typedef enum skd_irq_type {
SKD_IRQ_LEGACY,
SKD_IRQ_MSI,
@@ -265,7 +205,7 @@ typedef enum skd_irq_type {
#define SKD_MAX_BARS 2
struct skd_device {
- volatile void __iomem *mem_map[SKD_MAX_BARS];
+ void __iomem *mem_map[SKD_MAX_BARS];
resource_size_t mem_phys[SKD_MAX_BARS];
u32 mem_size[SKD_MAX_BARS];
@@ -276,21 +216,20 @@ struct skd_device {
spinlock_t lock;
struct gendisk *disk;
+ struct blk_mq_tag_set tag_set;
struct request_queue *queue;
+ struct skd_fitmsg_context *skmsg;
struct device *class_dev;
int gendisk_on;
int sync_done;
- atomic_t device_count;
u32 devno;
u32 major;
- char name[32];
char isr_name[30];
enum skd_drvr_state state;
u32 drive_state;
- u32 in_flight;
u32 cur_max_queue_depth;
u32 queue_low_water_mark;
u32 dev_max_queue_depth;
@@ -298,27 +237,20 @@ struct skd_device {
u32 num_fitmsg_context;
u32 num_req_context;
- u32 timeout_slot[SKD_N_TIMEOUT_SLOT];
- u32 timeout_stamp;
- struct skd_fitmsg_context *skmsg_free_list;
struct skd_fitmsg_context *skmsg_table;
- struct skd_request_context *skreq_free_list;
- struct skd_request_context *skreq_table;
-
- struct skd_special_context *skspcl_free_list;
- struct skd_special_context *skspcl_table;
-
struct skd_special_context internal_skspcl;
u32 read_cap_blocksize;
u32 read_cap_last_lba;
int read_cap_is_valid;
int inquiry_is_valid;
u8 inq_serial_num[13]; /*12 chars plus null term */
- u8 id_str[80]; /* holds a composite name (pci + sernum) */
u8 skcomp_cycle;
u32 skcomp_ix;
+ struct kmem_cache *msgbuf_cache;
+ struct kmem_cache *sglist_cache;
+ struct kmem_cache *databuf_cache;
struct fit_completion_entry_v1 *skcomp_table;
struct fit_comp_error_info *skerr_table;
dma_addr_t cq_dma_address;
@@ -329,7 +261,6 @@ struct skd_device {
u32 timer_countdown;
u32 timer_substate;
- int n_special;
int sgs_per_request;
u32 last_mtd;
@@ -343,7 +274,7 @@ struct skd_device {
u32 timo_slot;
-
+ struct work_struct start_queue;
struct work_struct completion_worker;
};
@@ -353,53 +284,32 @@ struct skd_device {
static inline u32 skd_reg_read32(struct skd_device *skdev, u32 offset)
{
- u32 val;
-
- if (likely(skdev->dbg_level < 2))
- return readl(skdev->mem_map[1] + offset);
- else {
- barrier();
- val = readl(skdev->mem_map[1] + offset);
- barrier();
- pr_debug("%s:%s:%d offset %x = %x\n",
- skdev->name, __func__, __LINE__, offset, val);
- return val;
- }
+ u32 val = readl(skdev->mem_map[1] + offset);
+ if (unlikely(skdev->dbg_level >= 2))
+ dev_dbg(&skdev->pdev->dev, "offset %x = %x\n", offset, val);
+ return val;
}
static inline void skd_reg_write32(struct skd_device *skdev, u32 val,
u32 offset)
{
- if (likely(skdev->dbg_level < 2)) {
- writel(val, skdev->mem_map[1] + offset);
- barrier();
- } else {
- barrier();
- writel(val, skdev->mem_map[1] + offset);
- barrier();
- pr_debug("%s:%s:%d offset %x = %x\n",
- skdev->name, __func__, __LINE__, offset, val);
- }
+ writel(val, skdev->mem_map[1] + offset);
+ if (unlikely(skdev->dbg_level >= 2))
+ dev_dbg(&skdev->pdev->dev, "offset %x = %x\n", offset, val);
}
static inline void skd_reg_write64(struct skd_device *skdev, u64 val,
u32 offset)
{
- if (likely(skdev->dbg_level < 2)) {
- writeq(val, skdev->mem_map[1] + offset);
- barrier();
- } else {
- barrier();
- writeq(val, skdev->mem_map[1] + offset);
- barrier();
- pr_debug("%s:%s:%d offset %x = %016llx\n",
- skdev->name, __func__, __LINE__, offset, val);
- }
+ writeq(val, skdev->mem_map[1] + offset);
+ if (unlikely(skdev->dbg_level >= 2))
+ dev_dbg(&skdev->pdev->dev, "offset %x = %016llx\n", offset,
+ val);
}
-#define SKD_IRQ_DEFAULT SKD_IRQ_MSI
+#define SKD_IRQ_DEFAULT SKD_IRQ_MSIX
static int skd_isr_type = SKD_IRQ_DEFAULT;
module_param(skd_isr_type, int, 0444);
@@ -412,7 +322,7 @@ static int skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
module_param(skd_max_req_per_msg, int, 0444);
MODULE_PARM_DESC(skd_max_req_per_msg,
"Maximum SCSI requests packed in a single message."
- " (1-14, default==1)");
+ " (1-" __stringify(SKD_MAX_REQ_PER_MSG) ", default==1)");
#define SKD_MAX_QUEUE_DEPTH_DEFAULT 64
#define SKD_MAX_QUEUE_DEPTH_DEFAULT_STR "64"
@@ -429,10 +339,10 @@ MODULE_PARM_DESC(skd_sgs_per_request,
"Maximum SG elements per block request."
" (1-4096, default==256)");
-static int skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
+static int skd_max_pass_thru = 1;
module_param(skd_max_pass_thru, int, 0444);
MODULE_PARM_DESC(skd_max_pass_thru,
- "Maximum SCSI pass-thru at a time." " (1-50, default==32)");
+ "Maximum SCSI pass-thru at a time. IGNORED");
module_param(skd_dbg_level, int, 0444);
MODULE_PARM_DESC(skd_dbg_level, "s1120 debug level (0,1,2)");
@@ -449,9 +359,6 @@ static void skd_send_fitmsg(struct skd_device *skdev,
struct skd_fitmsg_context *skmsg);
static void skd_send_special_fitmsg(struct skd_device *skdev,
struct skd_special_context *skspcl);
-static void skd_request_fn(struct request_queue *rq);
-static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, blk_status_t status);
static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
static void skd_postop_sg_list(struct skd_device *skdev,
@@ -460,19 +367,14 @@ static void skd_postop_sg_list(struct skd_device *skdev,
static void skd_restart_device(struct skd_device *skdev);
static int skd_quiesce_dev(struct skd_device *skdev);
static int skd_unquiesce_dev(struct skd_device *skdev);
-static void skd_release_special(struct skd_device *skdev,
- struct skd_special_context *skspcl);
static void skd_disable_interrupts(struct skd_device *skdev);
static void skd_isr_fwstate(struct skd_device *skdev);
-static void skd_recover_requests(struct skd_device *skdev, int requeue);
+static void skd_recover_requests(struct skd_device *skdev);
static void skd_soft_reset(struct skd_device *skdev);
-static const char *skd_name(struct skd_device *skdev);
const char *skd_drive_state_to_str(int state);
const char *skd_skdev_state_to_str(enum skd_drvr_state state);
static void skd_log_skdev(struct skd_device *skdev, const char *event);
-static void skd_log_skmsg(struct skd_device *skdev,
- struct skd_fitmsg_context *skmsg, const char *event);
static void skd_log_skreq(struct skd_device *skdev,
struct skd_request_context *skreq, const char *event);
@@ -481,18 +383,20 @@ static void skd_log_skreq(struct skd_device *skdev,
* READ/WRITE REQUESTS
*****************************************************************************
*/
-static void skd_fail_all_pending(struct skd_device *skdev)
+static void skd_inc_in_flight(struct request *rq, void *data, bool reserved)
{
- struct request_queue *q = skdev->queue;
- struct request *req;
+ int *count = data;
- for (;; ) {
- req = blk_peek_request(q);
- if (req == NULL)
- break;
- blk_start_request(req);
- __blk_end_request_all(req, BLK_STS_IOERR);
- }
+ count++;
+}
+
+static int skd_in_flight(struct skd_device *skdev)
+{
+ int count = 0;
+
+ blk_mq_tagset_busy_iter(&skdev->tag_set, skd_inc_in_flight, &count);
+
+ return count;
}
static void
@@ -501,9 +405,9 @@ skd_prep_rw_cdb(struct skd_scsi_request *scsi_req,
unsigned count)
{
if (data_dir == READ)
- scsi_req->cdb[0] = 0x28;
+ scsi_req->cdb[0] = READ_10;
else
- scsi_req->cdb[0] = 0x2a;
+ scsi_req->cdb[0] = WRITE_10;
scsi_req->cdb[1] = 0;
scsi_req->cdb[2] = (lba & 0xff000000) >> 24;
@@ -522,7 +426,7 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
{
skreq->flush_cmd = 1;
- scsi_req->cdb[0] = 0x35;
+ scsi_req->cdb[0] = SYNCHRONIZE_CACHE;
scsi_req->cdb[1] = 0;
scsi_req->cdb[2] = 0;
scsi_req->cdb[3] = 0;
@@ -534,307 +438,194 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
scsi_req->cdb[9] = 0;
}
-static void skd_request_fn_not_online(struct request_queue *q);
-
-static void skd_request_fn(struct request_queue *q)
+/*
+ * Return true if and only if all pending requests should be failed.
+ */
+static bool skd_fail_all(struct request_queue *q)
{
struct skd_device *skdev = q->queuedata;
- struct skd_fitmsg_context *skmsg = NULL;
- struct fit_msg_hdr *fmh = NULL;
- struct skd_request_context *skreq;
- struct request *req = NULL;
- struct skd_scsi_request *scsi_req;
- unsigned long io_flags;
- u32 lba;
- u32 count;
- int data_dir;
- u32 be_lba;
- u32 be_count;
- u64 be_dmaa;
- u64 cmdctxt;
- u32 timo_slot;
- void *cmd_ptr;
- int flush, fua;
-
- if (skdev->state != SKD_DRVR_STATE_ONLINE) {
- skd_request_fn_not_online(q);
- return;
- }
- if (blk_queue_stopped(skdev->queue)) {
- if (skdev->skmsg_free_list == NULL ||
- skdev->skreq_free_list == NULL ||
- skdev->in_flight >= skdev->queue_low_water_mark)
- /* There is still some kind of shortage */
- return;
-
- queue_flag_clear(QUEUE_FLAG_STOPPED, skdev->queue);
- }
+ SKD_ASSERT(skdev->state != SKD_DRVR_STATE_ONLINE);
- /*
- * Stop conditions:
- * - There are no more native requests
- * - There are already the maximum number of requests in progress
- * - There are no more skd_request_context entries
- * - There are no more FIT msg buffers
+ skd_log_skdev(skdev, "req_not_online");
+ switch (skdev->state) {
+ case SKD_DRVR_STATE_PAUSING:
+ case SKD_DRVR_STATE_PAUSED:
+ case SKD_DRVR_STATE_STARTING:
+ case SKD_DRVR_STATE_RESTARTING:
+ case SKD_DRVR_STATE_WAIT_BOOT:
+ /* In case of starting, we haven't started the queue,
+ * so we can't get here... but requests are
+ * possibly hanging out waiting for us because we
+ * reported the dev/skd0 already. They'll wait
+ * forever if connect doesn't complete.
+ * What to do??? delay dev/skd0 ??
*/
- for (;; ) {
-
- flush = fua = 0;
-
- req = blk_peek_request(q);
-
- /* Are there any native requests to start? */
- if (req == NULL)
- break;
-
- lba = (u32)blk_rq_pos(req);
- count = blk_rq_sectors(req);
- data_dir = rq_data_dir(req);
- io_flags = req->cmd_flags;
-
- if (req_op(req) == REQ_OP_FLUSH)
- flush++;
-
- if (io_flags & REQ_FUA)
- fua++;
-
- pr_debug("%s:%s:%d new req=%p lba=%u(0x%x) "
- "count=%u(0x%x) dir=%d\n",
- skdev->name, __func__, __LINE__,
- req, lba, lba, count, count, data_dir);
-
- /* At this point we know there is a request */
+ case SKD_DRVR_STATE_BUSY:
+ case SKD_DRVR_STATE_BUSY_IMMINENT:
+ case SKD_DRVR_STATE_BUSY_ERASE:
+ return false;
- /* Are too many requets already in progress? */
- if (skdev->in_flight >= skdev->cur_max_queue_depth) {
- pr_debug("%s:%s:%d qdepth %d, limit %d\n",
- skdev->name, __func__, __LINE__,
- skdev->in_flight, skdev->cur_max_queue_depth);
- break;
- }
+ case SKD_DRVR_STATE_BUSY_SANITIZE:
+ case SKD_DRVR_STATE_STOPPING:
+ case SKD_DRVR_STATE_SYNCING:
+ case SKD_DRVR_STATE_FAULT:
+ case SKD_DRVR_STATE_DISAPPEARED:
+ default:
+ return true;
+ }
+}
- /* Is a skd_request_context available? */
- skreq = skdev->skreq_free_list;
- if (skreq == NULL) {
- pr_debug("%s:%s:%d Out of req=%p\n",
- skdev->name, __func__, __LINE__, q);
- break;
- }
- SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
- SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
-
- /* Now we check to see if we can get a fit msg */
- if (skmsg == NULL) {
- if (skdev->skmsg_free_list == NULL) {
- pr_debug("%s:%s:%d Out of msg\n",
- skdev->name, __func__, __LINE__);
- break;
- }
- }
+static blk_status_t skd_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *mqd)
+{
+ struct request *const req = mqd->rq;
+ struct request_queue *const q = req->q;
+ struct skd_device *skdev = q->queuedata;
+ struct skd_fitmsg_context *skmsg;
+ struct fit_msg_hdr *fmh;
+ const u32 tag = blk_mq_unique_tag(req);
+ struct skd_request_context *const skreq = blk_mq_rq_to_pdu(req);
+ struct skd_scsi_request *scsi_req;
+ unsigned long flags = 0;
+ const u32 lba = blk_rq_pos(req);
+ const u32 count = blk_rq_sectors(req);
+ const int data_dir = rq_data_dir(req);
- skreq->flush_cmd = 0;
- skreq->n_sg = 0;
- skreq->sg_byte_count = 0;
+ if (unlikely(skdev->state != SKD_DRVR_STATE_ONLINE))
+ return skd_fail_all(q) ? BLK_STS_IOERR : BLK_STS_RESOURCE;
- /*
- * OK to now dequeue request from q.
- *
- * At this point we are comitted to either start or reject
- * the native request. Note that skd_request_context is
- * available but is still at the head of the free list.
- */
- blk_start_request(req);
- skreq->req = req;
- skreq->fitmsg_id = 0;
-
- /* Either a FIT msg is in progress or we have to start one. */
- if (skmsg == NULL) {
- /* Are there any FIT msg buffers available? */
- skmsg = skdev->skmsg_free_list;
- if (skmsg == NULL) {
- pr_debug("%s:%s:%d Out of msg skdev=%p\n",
- skdev->name, __func__, __LINE__,
- skdev);
- break;
- }
- SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE);
- SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0);
+ blk_mq_start_request(req);
- skdev->skmsg_free_list = skmsg->next;
+ WARN_ONCE(tag >= skd_max_queue_depth, "%#x > %#x (nr_requests = %lu)\n",
+ tag, skd_max_queue_depth, q->nr_requests);
- skmsg->state = SKD_MSG_STATE_BUSY;
- skmsg->id += SKD_ID_INCR;
+ SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
- /* Initialize the FIT msg header */
- fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
- memset(fmh, 0, sizeof(*fmh));
- fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
- skmsg->length = sizeof(*fmh);
- }
+ dev_dbg(&skdev->pdev->dev,
+ "new req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n", req, lba,
+ lba, count, count, data_dir);
- skreq->fitmsg_id = skmsg->id;
+ skreq->id = tag + SKD_ID_RW_REQUEST;
+ skreq->flush_cmd = 0;
+ skreq->n_sg = 0;
+ skreq->sg_byte_count = 0;
- /*
- * Note that a FIT msg may have just been started
- * but contains no SoFIT requests yet.
- */
+ skreq->fitmsg_id = 0;
- /*
- * Transcode the request, checking as we go. The outcome of
- * the transcoding is represented by the error variable.
- */
- cmd_ptr = &skmsg->msg_buf[skmsg->length];
- memset(cmd_ptr, 0, 32);
+ skreq->data_dir = data_dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
- be_lba = cpu_to_be32(lba);
- be_count = cpu_to_be32(count);
- be_dmaa = cpu_to_be64((u64)skreq->sksg_dma_address);
- cmdctxt = skreq->id + SKD_ID_INCR;
+ if (req->bio && !skd_preop_sg_list(skdev, skreq)) {
+ dev_dbg(&skdev->pdev->dev, "error Out\n");
+ skreq->status = BLK_STS_RESOURCE;
+ blk_mq_complete_request(req);
+ return BLK_STS_OK;
+ }
- scsi_req = cmd_ptr;
- scsi_req->hdr.tag = cmdctxt;
- scsi_req->hdr.sg_list_dma_address = be_dmaa;
+ dma_sync_single_for_device(&skdev->pdev->dev, skreq->sksg_dma_address,
+ skreq->n_sg *
+ sizeof(struct fit_sg_descriptor),
+ DMA_TO_DEVICE);
- if (data_dir == READ)
- skreq->sg_data_dir = SKD_DATA_DIR_CARD_TO_HOST;
- else
- skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD;
+ /* Either a FIT msg is in progress or we have to start one. */
+ if (skd_max_req_per_msg == 1) {
+ skmsg = NULL;
+ } else {
+ spin_lock_irqsave(&skdev->lock, flags);
+ skmsg = skdev->skmsg;
+ }
+ if (!skmsg) {
+ skmsg = &skdev->skmsg_table[tag];
+ skdev->skmsg = skmsg;
- if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) {
- skd_prep_zerosize_flush_cdb(scsi_req, skreq);
- SKD_ASSERT(skreq->flush_cmd == 1);
+ /* Initialize the FIT msg header */
+ fmh = &skmsg->msg_buf->fmh;
+ memset(fmh, 0, sizeof(*fmh));
+ fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
+ skmsg->length = sizeof(*fmh);
+ } else {
+ fmh = &skmsg->msg_buf->fmh;
+ }
- } else {
- skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
- }
+ skreq->fitmsg_id = skmsg->id;
- if (fua)
- scsi_req->cdb[1] |= SKD_FUA_NV;
+ scsi_req = &skmsg->msg_buf->scsi[fmh->num_protocol_cmds_coalesced];
+ memset(scsi_req, 0, sizeof(*scsi_req));
- if (!req->bio)
- goto skip_sg;
+ scsi_req->hdr.tag = skreq->id;
+ scsi_req->hdr.sg_list_dma_address =
+ cpu_to_be64(skreq->sksg_dma_address);
- if (!skd_preop_sg_list(skdev, skreq)) {
- /*
- * Complete the native request with error.
- * Note that the request context is still at the
- * head of the free list, and that the SoFIT request
- * was encoded into the FIT msg buffer but the FIT
- * msg length has not been updated. In short, the
- * only resource that has been allocated but might
- * not be used is that the FIT msg could be empty.
- */
- pr_debug("%s:%s:%d error Out\n",
- skdev->name, __func__, __LINE__);
- skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
- continue;
- }
+ if (req_op(req) == REQ_OP_FLUSH) {
+ skd_prep_zerosize_flush_cdb(scsi_req, skreq);
+ SKD_ASSERT(skreq->flush_cmd == 1);
+ } else {
+ skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
+ }
-skip_sg:
- scsi_req->hdr.sg_list_len_bytes =
- cpu_to_be32(skreq->sg_byte_count);
+ if (req->cmd_flags & REQ_FUA)
+ scsi_req->cdb[1] |= SKD_FUA_NV;
- /* Complete resource allocations. */
- skdev->skreq_free_list = skreq->next;
- skreq->state = SKD_REQ_STATE_BUSY;
- skreq->id += SKD_ID_INCR;
+ scsi_req->hdr.sg_list_len_bytes = cpu_to_be32(skreq->sg_byte_count);
- skmsg->length += sizeof(struct skd_scsi_request);
- fmh->num_protocol_cmds_coalesced++;
+ /* Complete resource allocations. */
+ skreq->state = SKD_REQ_STATE_BUSY;
- /*
- * Update the active request counts.
- * Capture the timeout timestamp.
- */
- skreq->timeout_stamp = skdev->timeout_stamp;
- timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
- skdev->timeout_slot[timo_slot]++;
- skdev->in_flight++;
- pr_debug("%s:%s:%d req=0x%x busy=%d\n",
- skdev->name, __func__, __LINE__,
- skreq->id, skdev->in_flight);
+ skmsg->length += sizeof(struct skd_scsi_request);
+ fmh->num_protocol_cmds_coalesced++;
- /*
- * If the FIT msg buffer is full send it.
- */
- if (skmsg->length >= SKD_N_FITMSG_BYTES ||
- fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
- skd_send_fitmsg(skdev, skmsg);
- skmsg = NULL;
- fmh = NULL;
- }
- }
+ dev_dbg(&skdev->pdev->dev, "req=0x%x busy=%d\n", skreq->id,
+ skd_in_flight(skdev));
/*
- * Is a FIT msg in progress? If it is empty put the buffer back
- * on the free list. If it is non-empty send what we got.
- * This minimizes latency when there are fewer requests than
- * what fits in a FIT msg.
+ * If the FIT msg buffer is full send it.
*/
- if (skmsg != NULL) {
- /* Bigger than just a FIT msg header? */
- if (skmsg->length > sizeof(struct fit_msg_hdr)) {
- pr_debug("%s:%s:%d sending msg=%p, len %d\n",
- skdev->name, __func__, __LINE__,
- skmsg, skmsg->length);
+ if (skd_max_req_per_msg == 1) {
+ skd_send_fitmsg(skdev, skmsg);
+ } else {
+ if (mqd->last ||
+ fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
skd_send_fitmsg(skdev, skmsg);
- } else {
- /*
- * The FIT msg is empty. It means we got started
- * on the msg, but the requests were rejected.
- */
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- skmsg->next = skdev->skmsg_free_list;
- skdev->skmsg_free_list = skmsg;
+ skdev->skmsg = NULL;
}
- skmsg = NULL;
- fmh = NULL;
+ spin_unlock_irqrestore(&skdev->lock, flags);
}
- /*
- * If req is non-NULL it means there is something to do but
- * we are out of a resource.
- */
- if (req)
- blk_stop_queue(skdev->queue);
+ return BLK_STS_OK;
}
-static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, blk_status_t error)
+static enum blk_eh_timer_return skd_timed_out(struct request *req,
+ bool reserved)
{
- if (unlikely(error)) {
- struct request *req = skreq->req;
- char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
- u32 lba = (u32)blk_rq_pos(req);
- u32 count = blk_rq_sectors(req);
-
- pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n",
- skd_name(skdev), cmd, lba, count, skreq->id);
- } else
- pr_debug("%s:%s:%d id=0x%x error=%d\n",
- skdev->name, __func__, __LINE__, skreq->id, error);
+ struct skd_device *skdev = req->q->queuedata;
+
+ dev_err(&skdev->pdev->dev, "request with tag %#x timed out\n",
+ blk_mq_unique_tag(req));
- __blk_end_request_all(skreq->req, error);
+ return BLK_EH_RESET_TIMER;
+}
+
+static void skd_complete_rq(struct request *req)
+{
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
+
+ blk_mq_end_request(req, skreq->status);
}
static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq)
{
- struct request *req = skreq->req;
- int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
- int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
- struct scatterlist *sg = &skreq->sg[0];
+ struct request *req = blk_mq_rq_from_pdu(skreq);
+ struct scatterlist *sgl = &skreq->sg[0], *sg;
int n_sg;
int i;
skreq->sg_byte_count = 0;
- /* SKD_ASSERT(skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD ||
- skreq->sg_data_dir == SKD_DATA_DIR_CARD_TO_HOST); */
+ WARN_ON_ONCE(skreq->data_dir != DMA_TO_DEVICE &&
+ skreq->data_dir != DMA_FROM_DEVICE);
- n_sg = blk_rq_map_sg(skdev->queue, req, sg);
+ n_sg = blk_rq_map_sg(skdev->queue, req, sgl);
if (n_sg <= 0)
return false;
@@ -842,7 +633,7 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
* Map scatterlist to PCI bus addresses.
* Note PCI might change the number of entries.
*/
- n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
+ n_sg = pci_map_sg(skdev->pdev, sgl, n_sg, skreq->data_dir);
if (n_sg <= 0)
return false;
@@ -850,10 +641,10 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
skreq->n_sg = n_sg;
- for (i = 0; i < n_sg; i++) {
+ for_each_sg(sgl, sg, n_sg, i) {
struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
- u32 cnt = sg_dma_len(&sg[i]);
- uint64_t dma_addr = sg_dma_address(&sg[i]);
+ u32 cnt = sg_dma_len(sg);
+ uint64_t dma_addr = sg_dma_address(sg);
sgd->control = FIT_SGD_CONTROL_NOT_LAST;
sgd->byte_count = cnt;
@@ -866,16 +657,16 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
skreq->sksg_list[n_sg - 1].control = FIT_SGD_CONTROL_LAST;
if (unlikely(skdev->dbg_level > 1)) {
- pr_debug("%s:%s:%d skreq=%x sksg_list=%p sksg_dma=%llx\n",
- skdev->name, __func__, __LINE__,
- skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
+ dev_dbg(&skdev->pdev->dev,
+ "skreq=%x sksg_list=%p sksg_dma=%llx\n",
+ skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
for (i = 0; i < n_sg; i++) {
struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
- pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
- "addr=0x%llx next=0x%llx\n",
- skdev->name, __func__, __LINE__,
- i, sgd->byte_count, sgd->control,
- sgd->host_side_addr, sgd->next_desc_ptr);
+
+ dev_dbg(&skdev->pdev->dev,
+ " sg[%d] count=%u ctrl=0x%x addr=0x%llx next=0x%llx\n",
+ i, sgd->byte_count, sgd->control,
+ sgd->host_side_addr, sgd->next_desc_ptr);
}
}
@@ -885,9 +676,6 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
static void skd_postop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq)
{
- int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
- int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
-
/*
* restore the next ptr for next IO request so we
* don't have to set it every time.
@@ -895,51 +683,7 @@ static void skd_postop_sg_list(struct skd_device *skdev,
skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr =
skreq->sksg_dma_address +
((skreq->n_sg) * sizeof(struct fit_sg_descriptor));
- pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, pci_dir);
-}
-
-static void skd_request_fn_not_online(struct request_queue *q)
-{
- struct skd_device *skdev = q->queuedata;
- int error;
-
- SKD_ASSERT(skdev->state != SKD_DRVR_STATE_ONLINE);
-
- skd_log_skdev(skdev, "req_not_online");
- switch (skdev->state) {
- case SKD_DRVR_STATE_PAUSING:
- case SKD_DRVR_STATE_PAUSED:
- case SKD_DRVR_STATE_STARTING:
- case SKD_DRVR_STATE_RESTARTING:
- case SKD_DRVR_STATE_WAIT_BOOT:
- /* In case of starting, we haven't started the queue,
- * so we can't get here... but requests are
- * possibly hanging out waiting for us because we
- * reported the dev/skd0 already. They'll wait
- * forever if connect doesn't complete.
- * What to do??? delay dev/skd0 ??
- */
- case SKD_DRVR_STATE_BUSY:
- case SKD_DRVR_STATE_BUSY_IMMINENT:
- case SKD_DRVR_STATE_BUSY_ERASE:
- case SKD_DRVR_STATE_DRAINING_TIMEOUT:
- return;
-
- case SKD_DRVR_STATE_BUSY_SANITIZE:
- case SKD_DRVR_STATE_STOPPING:
- case SKD_DRVR_STATE_SYNCING:
- case SKD_DRVR_STATE_FAULT:
- case SKD_DRVR_STATE_DISAPPEARED:
- default:
- error = -EIO;
- break;
- }
-
- /* If we get here, terminate all pending block requeusts
- * with EIO and any scsi pass thru with appropriate sense
- */
-
- skd_fail_all_pending(skdev);
+ pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, skreq->data_dir);
}
/*
@@ -950,12 +694,22 @@ static void skd_request_fn_not_online(struct request_queue *q)
static void skd_timer_tick_not_online(struct skd_device *skdev);
+static void skd_start_queue(struct work_struct *work)
+{
+ struct skd_device *skdev = container_of(work, typeof(*skdev),
+ start_queue);
+
+ /*
+ * Although it is safe to call blk_start_queue() from interrupt
+ * context, blk_mq_start_hw_queues() must not be called from
+ * interrupt context.
+ */
+ blk_mq_start_hw_queues(skdev->queue);
+}
+
static void skd_timer_tick(ulong arg)
{
struct skd_device *skdev = (struct skd_device *)arg;
-
- u32 timo_slot;
- u32 overdue_timestamp;
unsigned long reqflags;
u32 state;
@@ -972,37 +726,9 @@ static void skd_timer_tick(ulong arg)
if (state != skdev->drive_state)
skd_isr_fwstate(skdev);
- if (skdev->state != SKD_DRVR_STATE_ONLINE) {
+ if (skdev->state != SKD_DRVR_STATE_ONLINE)
skd_timer_tick_not_online(skdev);
- goto timer_func_out;
- }
- skdev->timeout_stamp++;
- timo_slot = skdev->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
-
- /*
- * All requests that happened during the previous use of
- * this slot should be done by now. The previous use was
- * over 7 seconds ago.
- */
- if (skdev->timeout_slot[timo_slot] == 0)
- goto timer_func_out;
-
- /* Something is overdue */
- overdue_timestamp = skdev->timeout_stamp - SKD_N_TIMEOUT_SLOT;
-
- pr_debug("%s:%s:%d found %d timeouts, draining busy=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->timeout_slot[timo_slot], skdev->in_flight);
- pr_err("(%s): Overdue IOs (%d), busy %d\n",
- skd_name(skdev), skdev->timeout_slot[timo_slot],
- skdev->in_flight);
- skdev->timer_countdown = SKD_DRAINING_TIMO;
- skdev->state = SKD_DRVR_STATE_DRAINING_TIMEOUT;
- skdev->timo_slot = timo_slot;
- blk_stop_queue(skdev->queue);
-
-timer_func_out:
mod_timer(&skdev->timer, (jiffies + HZ));
spin_unlock_irqrestore(&skdev->lock, reqflags);
@@ -1015,9 +741,9 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
case SKD_DRVR_STATE_LOAD:
break;
case SKD_DRVR_STATE_BUSY_SANITIZE:
- pr_debug("%s:%s:%d drive busy sanitize[%x], driver[%x]\n",
- skdev->name, __func__, __LINE__,
- skdev->drive_state, skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "drive busy sanitize[%x], driver[%x]\n",
+ skdev->drive_state, skdev->state);
/* If we've been in sanitize for 3 seconds, we figure we're not
* going to get anymore completions, so recover requests now
*/
@@ -1025,22 +751,21 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
skdev->timer_countdown--;
return;
}
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
break;
case SKD_DRVR_STATE_BUSY:
case SKD_DRVR_STATE_BUSY_IMMINENT:
case SKD_DRVR_STATE_BUSY_ERASE:
- pr_debug("%s:%s:%d busy[%x], countdown=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->state, skdev->timer_countdown);
+ dev_dbg(&skdev->pdev->dev, "busy[%x], countdown=%d\n",
+ skdev->state, skdev->timer_countdown);
if (skdev->timer_countdown > 0) {
skdev->timer_countdown--;
return;
}
- pr_debug("%s:%s:%d busy[%x], timedout=%d, restarting device.",
- skdev->name, __func__, __LINE__,
- skdev->state, skdev->timer_countdown);
+ dev_dbg(&skdev->pdev->dev,
+ "busy[%x], timedout=%d, restarting device.",
+ skdev->state, skdev->timer_countdown);
skd_restart_device(skdev);
break;
@@ -1054,12 +779,12 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
* revcover at some point. */
skdev->state = SKD_DRVR_STATE_FAULT;
- pr_err("(%s): DriveFault Connect Timeout (%x)\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev, "DriveFault Connect Timeout (%x)\n",
+ skdev->drive_state);
/*start the queue so we can respond with error to requests */
/* wakeup anyone waiting for startup complete */
- blk_start_queue(skdev->queue);
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
@@ -1072,29 +797,6 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
case SKD_DRVR_STATE_PAUSED:
break;
- case SKD_DRVR_STATE_DRAINING_TIMEOUT:
- pr_debug("%s:%s:%d "
- "draining busy [%d] tick[%d] qdb[%d] tmls[%d]\n",
- skdev->name, __func__, __LINE__,
- skdev->timo_slot,
- skdev->timer_countdown,
- skdev->in_flight,
- skdev->timeout_slot[skdev->timo_slot]);
- /* if the slot has cleared we can let the I/O continue */
- if (skdev->timeout_slot[skdev->timo_slot] == 0) {
- pr_debug("%s:%s:%d Slot drained, starting queue.\n",
- skdev->name, __func__, __LINE__);
- skdev->state = SKD_DRVR_STATE_ONLINE;
- blk_start_queue(skdev->queue);
- return;
- }
- if (skdev->timer_countdown > 0) {
- skdev->timer_countdown--;
- return;
- }
- skd_restart_device(skdev);
- break;
-
case SKD_DRVR_STATE_RESTARTING:
if (skdev->timer_countdown > 0) {
skdev->timer_countdown--;
@@ -1103,8 +805,9 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
/* For now, we fault the drive. Could attempt resets to
* revcover at some point. */
skdev->state = SKD_DRVR_STATE_FAULT;
- pr_err("(%s): DriveFault Reconnect Timeout (%x)\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev,
+ "DriveFault Reconnect Timeout (%x)\n",
+ skdev->drive_state);
/*
* Recovering does two things:
@@ -1124,18 +827,18 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
/* It never came out of soft reset. Try to
* recover the requests and then let them
* fail. This is to mitigate hung processes. */
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
else {
- pr_err("(%s): Disable BusMaster (%x)\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev, "Disable BusMaster (%x)\n",
+ skdev->drive_state);
pci_disable_device(skdev->pdev);
skd_disable_interrupts(skdev);
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
}
/*start the queue so we can respond with error to requests */
/* wakeup anyone waiting for startup complete */
- blk_start_queue(skdev->queue);
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
@@ -1154,13 +857,11 @@ static int skd_start_timer(struct skd_device *skdev)
{
int rc;
- init_timer(&skdev->timer);
setup_timer(&skdev->timer, skd_timer_tick, (ulong)skdev);
rc = mod_timer(&skdev->timer, (jiffies + HZ));
if (rc)
- pr_err("%s: failed to start timer %d\n",
- __func__, rc);
+ dev_err(&skdev->pdev->dev, "failed to start timer %d\n", rc);
return rc;
}
@@ -1171,634 +872,6 @@ static void skd_kill_timer(struct skd_device *skdev)
/*
*****************************************************************************
- * IOCTL
- *****************************************************************************
- */
-static int skd_ioctl_sg_io(struct skd_device *skdev,
- fmode_t mode, void __user *argp);
-static int skd_sg_io_get_and_check_args(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_obtain_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_prep_buffering(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_copy_buffer(struct skd_device *skdev,
- struct skd_sg_io *sksgio, int dxfer_dir);
-static int skd_sg_io_send_fitmsg(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_await(struct skd_device *skdev, struct skd_sg_io *sksgio);
-static int skd_sg_io_release_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_put_status(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-
-static void skd_complete_special(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
- struct skd_special_context *skspcl);
-
-static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode,
- uint cmd_in, ulong arg)
-{
- static const int sg_version_num = 30527;
- int rc = 0, timeout;
- struct gendisk *disk = bdev->bd_disk;
- struct skd_device *skdev = disk->private_data;
- int __user *p = (int __user *)arg;
-
- pr_debug("%s:%s:%d %s: CMD[%s] ioctl mode 0x%x, cmd 0x%x arg %0lx\n",
- skdev->name, __func__, __LINE__,
- disk->disk_name, current->comm, mode, cmd_in, arg);
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- switch (cmd_in) {
- case SG_SET_TIMEOUT:
- rc = get_user(timeout, p);
- if (!rc)
- disk->queue->sg_timeout = clock_t_to_jiffies(timeout);
- break;
- case SG_GET_TIMEOUT:
- rc = jiffies_to_clock_t(disk->queue->sg_timeout);
- break;
- case SG_GET_VERSION_NUM:
- rc = put_user(sg_version_num, p);
- break;
- case SG_IO:
- rc = skd_ioctl_sg_io(skdev, mode, (void __user *)arg);
- break;
-
- default:
- rc = -ENOTTY;
- break;
- }
-
- pr_debug("%s:%s:%d %s: completion rc %d\n",
- skdev->name, __func__, __LINE__, disk->disk_name, rc);
- return rc;
-}
-
-static int skd_ioctl_sg_io(struct skd_device *skdev, fmode_t mode,
- void __user *argp)
-{
- int rc;
- struct skd_sg_io sksgio;
-
- memset(&sksgio, 0, sizeof(sksgio));
- sksgio.mode = mode;
- sksgio.argp = argp;
- sksgio.iov = &sksgio.no_iov_iov;
-
- switch (skdev->state) {
- case SKD_DRVR_STATE_ONLINE:
- case SKD_DRVR_STATE_BUSY_IMMINENT:
- break;
-
- default:
- pr_debug("%s:%s:%d drive not online\n",
- skdev->name, __func__, __LINE__);
- rc = -ENXIO;
- goto out;
- }
-
- rc = skd_sg_io_get_and_check_args(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_obtain_skspcl(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_prep_buffering(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_copy_buffer(skdev, &sksgio, SG_DXFER_TO_DEV);
- if (rc)
- goto out;
-
- rc = skd_sg_io_send_fitmsg(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_await(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_copy_buffer(skdev, &sksgio, SG_DXFER_FROM_DEV);
- if (rc)
- goto out;
-
- rc = skd_sg_io_put_status(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = 0;
-
-out:
- skd_sg_io_release_skspcl(skdev, &sksgio);
-
- if (sksgio.iov != NULL && sksgio.iov != &sksgio.no_iov_iov)
- kfree(sksgio.iov);
- return rc;
-}
-
-static int skd_sg_io_get_and_check_args(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct sg_io_hdr *sgp = &sksgio->sg;
- int i, acc;
-
- if (!access_ok(VERIFY_WRITE, sksgio->argp, sizeof(sg_io_hdr_t))) {
- pr_debug("%s:%s:%d access sg failed %p\n",
- skdev->name, __func__, __LINE__, sksgio->argp);
- return -EFAULT;
- }
-
- if (__copy_from_user(sgp, sksgio->argp, sizeof(sg_io_hdr_t))) {
- pr_debug("%s:%s:%d copy_from_user sg failed %p\n",
- skdev->name, __func__, __LINE__, sksgio->argp);
- return -EFAULT;
- }
-
- if (sgp->interface_id != SG_INTERFACE_ID_ORIG) {
- pr_debug("%s:%s:%d interface_id invalid 0x%x\n",
- skdev->name, __func__, __LINE__, sgp->interface_id);
- return -EINVAL;
- }
-
- if (sgp->cmd_len > sizeof(sksgio->cdb)) {
- pr_debug("%s:%s:%d cmd_len invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->cmd_len);
- return -EINVAL;
- }
-
- if (sgp->iovec_count > 256) {
- pr_debug("%s:%s:%d iovec_count invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->iovec_count);
- return -EINVAL;
- }
-
- if (sgp->dxfer_len > (PAGE_SIZE * SKD_N_SG_PER_SPECIAL)) {
- pr_debug("%s:%s:%d dxfer_len invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->dxfer_len);
- return -EINVAL;
- }
-
- switch (sgp->dxfer_direction) {
- case SG_DXFER_NONE:
- acc = -1;
- break;
-
- case SG_DXFER_TO_DEV:
- acc = VERIFY_READ;
- break;
-
- case SG_DXFER_FROM_DEV:
- case SG_DXFER_TO_FROM_DEV:
- acc = VERIFY_WRITE;
- break;
-
- default:
- pr_debug("%s:%s:%d dxfer_dir invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->dxfer_direction);
- return -EINVAL;
- }
-
- if (copy_from_user(sksgio->cdb, sgp->cmdp, sgp->cmd_len)) {
- pr_debug("%s:%s:%d copy_from_user cmdp failed %p\n",
- skdev->name, __func__, __LINE__, sgp->cmdp);
- return -EFAULT;
- }
-
- if (sgp->mx_sb_len != 0) {
- if (!access_ok(VERIFY_WRITE, sgp->sbp, sgp->mx_sb_len)) {
- pr_debug("%s:%s:%d access sbp failed %p\n",
- skdev->name, __func__, __LINE__, sgp->sbp);
- return -EFAULT;
- }
- }
-
- if (sgp->iovec_count == 0) {
- sksgio->iov[0].iov_base = sgp->dxferp;
- sksgio->iov[0].iov_len = sgp->dxfer_len;
- sksgio->iovcnt = 1;
- sksgio->dxfer_len = sgp->dxfer_len;
- } else {
- struct sg_iovec *iov;
- uint nbytes = sizeof(*iov) * sgp->iovec_count;
- size_t iov_data_len;
-
- iov = kmalloc(nbytes, GFP_KERNEL);
- if (iov == NULL) {
- pr_debug("%s:%s:%d alloc iovec failed %d\n",
- skdev->name, __func__, __LINE__,
- sgp->iovec_count);
- return -ENOMEM;
- }
- sksgio->iov = iov;
- sksgio->iovcnt = sgp->iovec_count;
-
- if (copy_from_user(iov, sgp->dxferp, nbytes)) {
- pr_debug("%s:%s:%d copy_from_user iovec failed %p\n",
- skdev->name, __func__, __LINE__, sgp->dxferp);
- return -EFAULT;
- }
-
- /*
- * Sum up the vecs, making sure they don't overflow
- */
- iov_data_len = 0;
- for (i = 0; i < sgp->iovec_count; i++) {
- if (iov_data_len + iov[i].iov_len < iov_data_len)
- return -EINVAL;
- iov_data_len += iov[i].iov_len;
- }
-
- /* SG_IO howto says that the shorter of the two wins */
- if (sgp->dxfer_len < iov_data_len) {
- sksgio->iovcnt = iov_shorten((struct iovec *)iov,
- sgp->iovec_count,
- sgp->dxfer_len);
- sksgio->dxfer_len = sgp->dxfer_len;
- } else
- sksgio->dxfer_len = iov_data_len;
- }
-
- if (sgp->dxfer_direction != SG_DXFER_NONE) {
- struct sg_iovec *iov = sksgio->iov;
- for (i = 0; i < sksgio->iovcnt; i++, iov++) {
- if (!access_ok(acc, iov->iov_base, iov->iov_len)) {
- pr_debug("%s:%s:%d access data failed %p/%d\n",
- skdev->name, __func__, __LINE__,
- iov->iov_base, (int)iov->iov_len);
- return -EFAULT;
- }
- }
- }
-
- return 0;
-}
-
-static int skd_sg_io_obtain_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = NULL;
- int rc;
-
- for (;;) {
- ulong flags;
-
- spin_lock_irqsave(&skdev->lock, flags);
- skspcl = skdev->skspcl_free_list;
- if (skspcl != NULL) {
- skdev->skspcl_free_list =
- (struct skd_special_context *)skspcl->req.next;
- skspcl->req.id += SKD_ID_INCR;
- skspcl->req.state = SKD_REQ_STATE_SETUP;
- skspcl->orphaned = 0;
- skspcl->req.n_sg = 0;
- }
- spin_unlock_irqrestore(&skdev->lock, flags);
-
- if (skspcl != NULL) {
- rc = 0;
- break;
- }
-
- pr_debug("%s:%s:%d blocking\n",
- skdev->name, __func__, __LINE__);
-
- rc = wait_event_interruptible_timeout(
- skdev->waitq,
- (skdev->skspcl_free_list != NULL),
- msecs_to_jiffies(sksgio->sg.timeout));
-
- pr_debug("%s:%s:%d unblocking, rc=%d\n",
- skdev->name, __func__, __LINE__, rc);
-
- if (rc <= 0) {
- if (rc == 0)
- rc = -ETIMEDOUT;
- else
- rc = -EINTR;
- break;
- }
- /*
- * If we get here rc > 0 meaning the timeout to
- * wait_event_interruptible_timeout() had time left, hence the
- * sought event -- non-empty free list -- happened.
- * Retry the allocation.
- */
- }
- sksgio->skspcl = skspcl;
-
- return rc;
-}
-
-static int skd_skreq_prep_buffering(struct skd_device *skdev,
- struct skd_request_context *skreq,
- u32 dxfer_len)
-{
- u32 resid = dxfer_len;
-
- /*
- * The DMA engine must have aligned addresses and byte counts.
- */
- resid += (-resid) & 3;
- skreq->sg_byte_count = resid;
-
- skreq->n_sg = 0;
-
- while (resid > 0) {
- u32 nbytes = PAGE_SIZE;
- u32 ix = skreq->n_sg;
- struct scatterlist *sg = &skreq->sg[ix];
- struct fit_sg_descriptor *sksg = &skreq->sksg_list[ix];
- struct page *page;
-
- if (nbytes > resid)
- nbytes = resid;
-
- page = alloc_page(GFP_KERNEL);
- if (page == NULL)
- return -ENOMEM;
-
- sg_set_page(sg, page, nbytes, 0);
-
- /* TODO: This should be going through a pci_???()
- * routine to do proper mapping. */
- sksg->control = FIT_SGD_CONTROL_NOT_LAST;
- sksg->byte_count = nbytes;
-
- sksg->host_side_addr = sg_phys(sg);
-
- sksg->dev_side_addr = 0;
- sksg->next_desc_ptr = skreq->sksg_dma_address +
- (ix + 1) * sizeof(*sksg);
-
- skreq->n_sg++;
- resid -= nbytes;
- }
-
- if (skreq->n_sg > 0) {
- u32 ix = skreq->n_sg - 1;
- struct fit_sg_descriptor *sksg = &skreq->sksg_list[ix];
-
- sksg->control = FIT_SGD_CONTROL_LAST;
- sksg->next_desc_ptr = 0;
- }
-
- if (unlikely(skdev->dbg_level > 1)) {
- u32 i;
-
- pr_debug("%s:%s:%d skreq=%x sksg_list=%p sksg_dma=%llx\n",
- skdev->name, __func__, __LINE__,
- skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
- for (i = 0; i < skreq->n_sg; i++) {
- struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
-
- pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
- "addr=0x%llx next=0x%llx\n",
- skdev->name, __func__, __LINE__,
- i, sgd->byte_count, sgd->control,
- sgd->host_side_addr, sgd->next_desc_ptr);
- }
- }
-
- return 0;
-}
-
-static int skd_sg_io_prep_buffering(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
- struct skd_request_context *skreq = &skspcl->req;
- u32 dxfer_len = sksgio->dxfer_len;
- int rc;
-
- rc = skd_skreq_prep_buffering(skdev, skreq, dxfer_len);
- /*
- * Eventually, errors or not, skd_release_special() is called
- * to recover allocations including partial allocations.
- */
- return rc;
-}
-
-static int skd_sg_io_copy_buffer(struct skd_device *skdev,
- struct skd_sg_io *sksgio, int dxfer_dir)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
- u32 iov_ix = 0;
- struct sg_iovec curiov;
- u32 sksg_ix = 0;
- u8 *bufp = NULL;
- u32 buf_len = 0;
- u32 resid = sksgio->dxfer_len;
- int rc;
-
- curiov.iov_len = 0;
- curiov.iov_base = NULL;
-
- if (dxfer_dir != sksgio->sg.dxfer_direction) {
- if (dxfer_dir != SG_DXFER_TO_DEV ||
- sksgio->sg.dxfer_direction != SG_DXFER_TO_FROM_DEV)
- return 0;
- }
-
- while (resid > 0) {
- u32 nbytes = PAGE_SIZE;
-
- if (curiov.iov_len == 0) {
- curiov = sksgio->iov[iov_ix++];
- continue;
- }
-
- if (buf_len == 0) {
- struct page *page;
- page = sg_page(&skspcl->req.sg[sksg_ix++]);
- bufp = page_address(page);
- buf_len = PAGE_SIZE;
- }
-
- nbytes = min_t(u32, nbytes, resid);
- nbytes = min_t(u32, nbytes, curiov.iov_len);
- nbytes = min_t(u32, nbytes, buf_len);
-
- if (dxfer_dir == SG_DXFER_TO_DEV)
- rc = __copy_from_user(bufp, curiov.iov_base, nbytes);
- else
- rc = __copy_to_user(curiov.iov_base, bufp, nbytes);
-
- if (rc)
- return -EFAULT;
-
- resid -= nbytes;
- curiov.iov_len -= nbytes;
- curiov.iov_base += nbytes;
- buf_len -= nbytes;
- }
-
- return 0;
-}
-
-static int skd_sg_io_send_fitmsg(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
- struct fit_msg_hdr *fmh = (struct fit_msg_hdr *)skspcl->msg_buf;
- struct skd_scsi_request *scsi_req = (struct skd_scsi_request *)&fmh[1];
-
- memset(skspcl->msg_buf, 0, SKD_N_SPECIAL_FITMSG_BYTES);
-
- /* Initialize the FIT msg header */
- fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
- fmh->num_protocol_cmds_coalesced = 1;
-
- /* Initialize the SCSI request */
- if (sksgio->sg.dxfer_direction != SG_DXFER_NONE)
- scsi_req->hdr.sg_list_dma_address =
- cpu_to_be64(skspcl->req.sksg_dma_address);
- scsi_req->hdr.tag = skspcl->req.id;
- scsi_req->hdr.sg_list_len_bytes =
- cpu_to_be32(skspcl->req.sg_byte_count);
- memcpy(scsi_req->cdb, sksgio->cdb, sizeof(scsi_req->cdb));
-
- skspcl->req.state = SKD_REQ_STATE_BUSY;
- skd_send_special_fitmsg(skdev, skspcl);
-
- return 0;
-}
-
-static int skd_sg_io_await(struct skd_device *skdev, struct skd_sg_io *sksgio)
-{
- unsigned long flags;
- int rc;
-
- rc = wait_event_interruptible_timeout(skdev->waitq,
- (sksgio->skspcl->req.state !=
- SKD_REQ_STATE_BUSY),
- msecs_to_jiffies(sksgio->sg.
- timeout));
-
- spin_lock_irqsave(&skdev->lock, flags);
-
- if (sksgio->skspcl->req.state == SKD_REQ_STATE_ABORTED) {
- pr_debug("%s:%s:%d skspcl %p aborted\n",
- skdev->name, __func__, __LINE__, sksgio->skspcl);
-
- /* Build check cond, sense and let command finish. */
- /* For a timeout, we must fabricate completion and sense
- * data to complete the command */
- sksgio->skspcl->req.completion.status =
- SAM_STAT_CHECK_CONDITION;
-
- memset(&sksgio->skspcl->req.err_info, 0,
- sizeof(sksgio->skspcl->req.err_info));
- sksgio->skspcl->req.err_info.type = 0x70;
- sksgio->skspcl->req.err_info.key = ABORTED_COMMAND;
- sksgio->skspcl->req.err_info.code = 0x44;
- sksgio->skspcl->req.err_info.qual = 0;
- rc = 0;
- } else if (sksgio->skspcl->req.state != SKD_REQ_STATE_BUSY)
- /* No longer on the adapter. We finish. */
- rc = 0;
- else {
- /* Something's gone wrong. Still busy. Timeout or
- * user interrupted (control-C). Mark as an orphan
- * so it will be disposed when completed. */
- sksgio->skspcl->orphaned = 1;
- sksgio->skspcl = NULL;
- if (rc == 0) {
- pr_debug("%s:%s:%d timed out %p (%u ms)\n",
- skdev->name, __func__, __LINE__,
- sksgio, sksgio->sg.timeout);
- rc = -ETIMEDOUT;
- } else {
- pr_debug("%s:%s:%d cntlc %p\n",
- skdev->name, __func__, __LINE__, sksgio);
- rc = -EINTR;
- }
- }
-
- spin_unlock_irqrestore(&skdev->lock, flags);
-
- return rc;
-}
-
-static int skd_sg_io_put_status(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct sg_io_hdr *sgp = &sksgio->sg;
- struct skd_special_context *skspcl = sksgio->skspcl;
- int resid = 0;
-
- u32 nb = be32_to_cpu(skspcl->req.completion.num_returned_bytes);
-
- sgp->status = skspcl->req.completion.status;
- resid = sksgio->dxfer_len - nb;
-
- sgp->masked_status = sgp->status & STATUS_MASK;
- sgp->msg_status = 0;
- sgp->host_status = 0;
- sgp->driver_status = 0;
- sgp->resid = resid;
- if (sgp->masked_status || sgp->host_status || sgp->driver_status)
- sgp->info |= SG_INFO_CHECK;
-
- pr_debug("%s:%s:%d status %x masked %x resid 0x%x\n",
- skdev->name, __func__, __LINE__,
- sgp->status, sgp->masked_status, sgp->resid);
-
- if (sgp->masked_status == SAM_STAT_CHECK_CONDITION) {
- if (sgp->mx_sb_len > 0) {
- struct fit_comp_error_info *ei = &skspcl->req.err_info;
- u32 nbytes = sizeof(*ei);
-
- nbytes = min_t(u32, nbytes, sgp->mx_sb_len);
-
- sgp->sb_len_wr = nbytes;
-
- if (__copy_to_user(sgp->sbp, ei, nbytes)) {
- pr_debug("%s:%s:%d copy_to_user sense failed %p\n",
- skdev->name, __func__, __LINE__,
- sgp->sbp);
- return -EFAULT;
- }
- }
- }
-
- if (__copy_to_user(sksgio->argp, sgp, sizeof(sg_io_hdr_t))) {
- pr_debug("%s:%s:%d copy_to_user sg failed %p\n",
- skdev->name, __func__, __LINE__, sksgio->argp);
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int skd_sg_io_release_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
-
- if (skspcl != NULL) {
- ulong flags;
-
- sksgio->skspcl = NULL;
-
- spin_lock_irqsave(&skdev->lock, flags);
- skd_release_special(skdev, skspcl);
- spin_unlock_irqrestore(&skdev->lock, flags);
- }
-
- return 0;
-}
-
-/*
- *****************************************************************************
* INTERNAL REQUESTS -- generated by driver itself
*****************************************************************************
*/
@@ -1811,14 +884,15 @@ static int skd_format_internal_skspcl(struct skd_device *skdev)
uint64_t dma_address;
struct skd_scsi_request *scsi;
- fmh = (struct fit_msg_hdr *)&skspcl->msg_buf[0];
+ fmh = &skspcl->msg_buf->fmh;
fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
fmh->num_protocol_cmds_coalesced = 1;
- scsi = (struct skd_scsi_request *)&skspcl->msg_buf[64];
+ scsi = &skspcl->msg_buf->scsi[0];
memset(scsi, 0, sizeof(*scsi));
dma_address = skspcl->req.sksg_dma_address;
scsi->hdr.sg_list_dma_address = cpu_to_be64(dma_address);
+ skspcl->req.n_sg = 1;
sgd->control = FIT_SGD_CONTROL_LAST;
sgd->byte_count = 0;
sgd->host_side_addr = skspcl->db_dma_address;
@@ -1846,11 +920,9 @@ static void skd_send_internal_skspcl(struct skd_device *skdev,
*/
return;
- SKD_ASSERT((skspcl->req.id & SKD_ID_INCR) == 0);
skspcl->req.state = SKD_REQ_STATE_BUSY;
- skspcl->req.id += SKD_ID_INCR;
- scsi = (struct skd_scsi_request *)&skspcl->msg_buf[64];
+ scsi = &skspcl->msg_buf->scsi[0];
scsi->hdr.tag = skspcl->req.id;
memset(scsi->cdb, 0, sizeof(scsi->cdb));
@@ -1940,32 +1012,35 @@ static void skd_log_check_status(struct skd_device *skdev, u8 status, u8 key,
/* If the check condition is of special interest, log a message */
if ((status == SAM_STAT_CHECK_CONDITION) && (key == 0x02)
&& (code == 0x04) && (qual == 0x06)) {
- pr_err("(%s): *** LOST_WRITE_DATA ERROR *** key/asc/"
- "ascq/fruc %02x/%02x/%02x/%02x\n",
- skd_name(skdev), key, code, qual, fruc);
+ dev_err(&skdev->pdev->dev,
+ "*** LOST_WRITE_DATA ERROR *** key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
+ key, code, qual, fruc);
}
}
static void skd_complete_internal(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
+ struct fit_completion_entry_v1 *skcomp,
+ struct fit_comp_error_info *skerr,
struct skd_special_context *skspcl)
{
u8 *buf = skspcl->data_buf;
u8 status;
int i;
- struct skd_scsi_request *scsi =
- (struct skd_scsi_request *)&skspcl->msg_buf[64];
+ struct skd_scsi_request *scsi = &skspcl->msg_buf->scsi[0];
+
+ lockdep_assert_held(&skdev->lock);
SKD_ASSERT(skspcl == &skdev->internal_skspcl);
- pr_debug("%s:%s:%d complete internal %x\n",
- skdev->name, __func__, __LINE__, scsi->cdb[0]);
+ dev_dbg(&skdev->pdev->dev, "complete internal %x\n", scsi->cdb[0]);
+
+ dma_sync_single_for_cpu(&skdev->pdev->dev,
+ skspcl->db_dma_address,
+ skspcl->req.sksg_list[0].byte_count,
+ DMA_BIDIRECTIONAL);
skspcl->req.completion = *skcomp;
skspcl->req.state = SKD_REQ_STATE_IDLE;
- skspcl->req.id += SKD_ID_INCR;
status = skspcl->req.completion.status;
@@ -1981,14 +1056,15 @@ static void skd_complete_internal(struct skd_device *skdev,
skd_send_internal_skspcl(skdev, skspcl, WRITE_BUFFER);
else {
if (skdev->state == SKD_DRVR_STATE_STOPPING) {
- pr_debug("%s:%s:%d TUR failed, don't send anymore state 0x%x\n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "TUR failed, don't send anymore state 0x%x\n",
+ skdev->state);
return;
}
- pr_debug("%s:%s:%d **** TUR failed, retry skerr\n",
- skdev->name, __func__, __LINE__);
- skd_send_internal_skspcl(skdev, skspcl, 0x00);
+ dev_dbg(&skdev->pdev->dev,
+ "**** TUR failed, retry skerr\n");
+ skd_send_internal_skspcl(skdev, skspcl,
+ TEST_UNIT_READY);
}
break;
@@ -1997,14 +1073,15 @@ static void skd_complete_internal(struct skd_device *skdev,
skd_send_internal_skspcl(skdev, skspcl, READ_BUFFER);
else {
if (skdev->state == SKD_DRVR_STATE_STOPPING) {
- pr_debug("%s:%s:%d write buffer failed, don't send anymore state 0x%x\n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "write buffer failed, don't send anymore state 0x%x\n",
+ skdev->state);
return;
}
- pr_debug("%s:%s:%d **** write buffer failed, retry skerr\n",
- skdev->name, __func__, __LINE__);
- skd_send_internal_skspcl(skdev, skspcl, 0x00);
+ dev_dbg(&skdev->pdev->dev,
+ "**** write buffer failed, retry skerr\n");
+ skd_send_internal_skspcl(skdev, skspcl,
+ TEST_UNIT_READY);
}
break;
@@ -2014,33 +1091,31 @@ static void skd_complete_internal(struct skd_device *skdev,
skd_send_internal_skspcl(skdev, skspcl,
READ_CAPACITY);
else {
- pr_err(
- "(%s):*** W/R Buffer mismatch %d ***\n",
- skd_name(skdev), skdev->connect_retries);
+ dev_err(&skdev->pdev->dev,
+ "*** W/R Buffer mismatch %d ***\n",
+ skdev->connect_retries);
if (skdev->connect_retries <
SKD_MAX_CONNECT_RETRIES) {
skdev->connect_retries++;
skd_soft_reset(skdev);
} else {
- pr_err(
- "(%s): W/R Buffer Connect Error\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev,
+ "W/R Buffer Connect Error\n");
return;
}
}
} else {
if (skdev->state == SKD_DRVR_STATE_STOPPING) {
- pr_debug("%s:%s:%d "
- "read buffer failed, don't send anymore state 0x%x\n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "read buffer failed, don't send anymore state 0x%x\n",
+ skdev->state);
return;
}
- pr_debug("%s:%s:%d "
- "**** read buffer failed, retry skerr\n",
- skdev->name, __func__, __LINE__);
- skd_send_internal_skspcl(skdev, skspcl, 0x00);
+ dev_dbg(&skdev->pdev->dev,
+ "**** read buffer failed, retry skerr\n");
+ skd_send_internal_skspcl(skdev, skspcl,
+ TEST_UNIT_READY);
}
break;
@@ -2054,10 +1129,9 @@ static void skd_complete_internal(struct skd_device *skdev,
(buf[4] << 24) | (buf[5] << 16) |
(buf[6] << 8) | buf[7];
- pr_debug("%s:%s:%d last lba %d, bs %d\n",
- skdev->name, __func__, __LINE__,
- skdev->read_cap_last_lba,
- skdev->read_cap_blocksize);
+ dev_dbg(&skdev->pdev->dev, "last lba %d, bs %d\n",
+ skdev->read_cap_last_lba,
+ skdev->read_cap_blocksize);
set_capacity(skdev->disk, skdev->read_cap_last_lba + 1);
@@ -2068,13 +1142,10 @@ static void skd_complete_internal(struct skd_device *skdev,
(skerr->key == MEDIUM_ERROR)) {
skdev->read_cap_last_lba = ~0;
set_capacity(skdev->disk, skdev->read_cap_last_lba + 1);
- pr_debug("%s:%s:%d "
- "**** MEDIUM ERROR caused READCAP to fail, ignore failure and continue to inquiry\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** MEDIUM ERROR caused READCAP to fail, ignore failure and continue to inquiry\n");
skd_send_internal_skspcl(skdev, skspcl, INQUIRY);
} else {
- pr_debug("%s:%s:%d **** READCAP failed, retry TUR\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** READCAP failed, retry TUR\n");
skd_send_internal_skspcl(skdev, skspcl,
TEST_UNIT_READY);
}
@@ -2091,8 +1162,7 @@ static void skd_complete_internal(struct skd_device *skdev,
}
if (skd_unquiesce_dev(skdev) < 0)
- pr_debug("%s:%s:%d **** failed, to ONLINE device\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** failed, to ONLINE device\n");
/* connection is complete */
skdev->connect_retries = 0;
break;
@@ -2120,27 +1190,20 @@ static void skd_send_fitmsg(struct skd_device *skdev,
struct skd_fitmsg_context *skmsg)
{
u64 qcmd;
- struct fit_msg_hdr *fmh;
- pr_debug("%s:%s:%d dma address 0x%llx, busy=%d\n",
- skdev->name, __func__, __LINE__,
- skmsg->mb_dma_address, skdev->in_flight);
- pr_debug("%s:%s:%d msg_buf 0x%p, offset %x\n",
- skdev->name, __func__, __LINE__,
- skmsg->msg_buf, skmsg->offset);
+ dev_dbg(&skdev->pdev->dev, "dma address 0x%llx, busy=%d\n",
+ skmsg->mb_dma_address, skd_in_flight(skdev));
+ dev_dbg(&skdev->pdev->dev, "msg_buf %p\n", skmsg->msg_buf);
qcmd = skmsg->mb_dma_address;
qcmd |= FIT_QCMD_QID_NORMAL;
- fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
- skmsg->outstanding = fmh->num_protocol_cmds_coalesced;
-
if (unlikely(skdev->dbg_level > 1)) {
u8 *bp = (u8 *)skmsg->msg_buf;
int i;
for (i = 0; i < skmsg->length; i += 8) {
- pr_debug("%s:%s:%d msg[%2d] %8ph\n",
- skdev->name, __func__, __LINE__, i, &bp[i]);
+ dev_dbg(&skdev->pdev->dev, "msg[%2d] %8ph\n", i,
+ &bp[i]);
if (i == 0)
i = 64 - 8;
}
@@ -2160,6 +1223,12 @@ static void skd_send_fitmsg(struct skd_device *skdev,
*/
qcmd |= FIT_QCMD_MSGSIZE_64;
+ dma_sync_single_for_device(&skdev->pdev->dev, skmsg->mb_dma_address,
+ skmsg->length, DMA_TO_DEVICE);
+
+ /* Make sure skd_msg_buf is written before the doorbell is triggered. */
+ smp_wmb();
+
SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
}
@@ -2168,30 +1237,31 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
{
u64 qcmd;
+ WARN_ON_ONCE(skspcl->req.n_sg != 1);
+
if (unlikely(skdev->dbg_level > 1)) {
u8 *bp = (u8 *)skspcl->msg_buf;
int i;
for (i = 0; i < SKD_N_SPECIAL_FITMSG_BYTES; i += 8) {
- pr_debug("%s:%s:%d spcl[%2d] %8ph\n",
- skdev->name, __func__, __LINE__, i, &bp[i]);
+ dev_dbg(&skdev->pdev->dev, " spcl[%2d] %8ph\n", i,
+ &bp[i]);
if (i == 0)
i = 64 - 8;
}
- pr_debug("%s:%s:%d skspcl=%p id=%04x sksg_list=%p sksg_dma=%llx\n",
- skdev->name, __func__, __LINE__,
- skspcl, skspcl->req.id, skspcl->req.sksg_list,
- skspcl->req.sksg_dma_address);
+ dev_dbg(&skdev->pdev->dev,
+ "skspcl=%p id=%04x sksg_list=%p sksg_dma=%llx\n",
+ skspcl, skspcl->req.id, skspcl->req.sksg_list,
+ skspcl->req.sksg_dma_address);
for (i = 0; i < skspcl->req.n_sg; i++) {
struct fit_sg_descriptor *sgd =
&skspcl->req.sksg_list[i];
- pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
- "addr=0x%llx next=0x%llx\n",
- skdev->name, __func__, __LINE__,
- i, sgd->byte_count, sgd->control,
- sgd->host_side_addr, sgd->next_desc_ptr);
+ dev_dbg(&skdev->pdev->dev,
+ " sg[%d] count=%u ctrl=0x%x addr=0x%llx next=0x%llx\n",
+ i, sgd->byte_count, sgd->control,
+ sgd->host_side_addr, sgd->next_desc_ptr);
}
}
@@ -2202,6 +1272,20 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
qcmd = skspcl->mb_dma_address;
qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128;
+ dma_sync_single_for_device(&skdev->pdev->dev, skspcl->mb_dma_address,
+ SKD_N_SPECIAL_FITMSG_BYTES, DMA_TO_DEVICE);
+ dma_sync_single_for_device(&skdev->pdev->dev,
+ skspcl->req.sksg_dma_address,
+ 1 * sizeof(struct fit_sg_descriptor),
+ DMA_TO_DEVICE);
+ dma_sync_single_for_device(&skdev->pdev->dev,
+ skspcl->db_dma_address,
+ skspcl->req.sksg_list[0].byte_count,
+ DMA_BIDIRECTIONAL);
+
+ /* Make sure skd_msg_buf is written before the doorbell is triggered. */
+ smp_wmb();
+
SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
}
@@ -2212,8 +1296,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
*/
static void skd_complete_other(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr);
+ struct fit_completion_entry_v1 *skcomp,
+ struct fit_comp_error_info *skerr);
struct sns_info {
u8 type;
@@ -2262,21 +1346,20 @@ static struct sns_info skd_chkstat_table[] = {
static enum skd_check_status_action
skd_check_status(struct skd_device *skdev,
- u8 cmp_status, volatile struct fit_comp_error_info *skerr)
+ u8 cmp_status, struct fit_comp_error_info *skerr)
{
- int i, n;
+ int i;
- pr_err("(%s): key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
- skd_name(skdev), skerr->key, skerr->code, skerr->qual,
- skerr->fruc);
+ dev_err(&skdev->pdev->dev, "key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
+ skerr->key, skerr->code, skerr->qual, skerr->fruc);
- pr_debug("%s:%s:%d stat: t=%02x stat=%02x k=%02x c=%02x q=%02x fruc=%02x\n",
- skdev->name, __func__, __LINE__, skerr->type, cmp_status,
- skerr->key, skerr->code, skerr->qual, skerr->fruc);
+ dev_dbg(&skdev->pdev->dev,
+ "stat: t=%02x stat=%02x k=%02x c=%02x q=%02x fruc=%02x\n",
+ skerr->type, cmp_status, skerr->key, skerr->code, skerr->qual,
+ skerr->fruc);
/* Does the info match an entry in the good category? */
- n = sizeof(skd_chkstat_table) / sizeof(skd_chkstat_table[0]);
- for (i = 0; i < n; i++) {
+ for (i = 0; i < ARRAY_SIZE(skd_chkstat_table); i++) {
struct sns_info *sns = &skd_chkstat_table[i];
if (sns->mask & 0x10)
@@ -2300,10 +1383,9 @@ skd_check_status(struct skd_device *skdev,
continue;
if (sns->action == SKD_CHECK_STATUS_REPORT_SMART_ALERT) {
- pr_err("(%s): SMART Alert: sense key/asc/ascq "
- "%02x/%02x/%02x\n",
- skd_name(skdev), skerr->key,
- skerr->code, skerr->qual);
+ dev_err(&skdev->pdev->dev,
+ "SMART Alert: sense key/asc/ascq %02x/%02x/%02x\n",
+ skerr->key, skerr->code, skerr->qual);
}
return sns->action;
}
@@ -2312,335 +1394,80 @@ skd_check_status(struct skd_device *skdev,
* zero status means good
*/
if (cmp_status) {
- pr_debug("%s:%s:%d status check: error\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "status check: error\n");
return SKD_CHECK_STATUS_REPORT_ERROR;
}
- pr_debug("%s:%s:%d status check good default\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "status check good default\n");
return SKD_CHECK_STATUS_REPORT_GOOD;
}
static void skd_resolve_req_exception(struct skd_device *skdev,
- struct skd_request_context *skreq)
+ struct skd_request_context *skreq,
+ struct request *req)
{
u8 cmp_status = skreq->completion.status;
switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
case SKD_CHECK_STATUS_REPORT_GOOD:
case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
- skd_end_request(skdev, skreq, BLK_STS_OK);
+ skreq->status = BLK_STS_OK;
+ blk_mq_complete_request(req);
break;
case SKD_CHECK_STATUS_BUSY_IMMINENT:
skd_log_skreq(skdev, skreq, "retry(busy)");
- blk_requeue_request(skdev->queue, skreq->req);
- pr_info("(%s) drive BUSY imminent\n", skd_name(skdev));
+ blk_requeue_request(skdev->queue, req);
+ dev_info(&skdev->pdev->dev, "drive BUSY imminent\n");
skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT;
skdev->timer_countdown = SKD_TIMER_MINUTES(20);
skd_quiesce_dev(skdev);
break;
case SKD_CHECK_STATUS_REQUEUE_REQUEST:
- if ((unsigned long) ++skreq->req->special < SKD_MAX_RETRIES) {
+ if ((unsigned long) ++req->special < SKD_MAX_RETRIES) {
skd_log_skreq(skdev, skreq, "retry");
- blk_requeue_request(skdev->queue, skreq->req);
+ blk_requeue_request(skdev->queue, req);
break;
}
- /* fall through to report error */
+ /* fall through */
case SKD_CHECK_STATUS_REPORT_ERROR:
default:
- skd_end_request(skdev, skreq, BLK_STS_IOERR);
+ skreq->status = BLK_STS_IOERR;
+ blk_mq_complete_request(req);
break;
}
}
-/* assume spinlock is already held */
static void skd_release_skreq(struct skd_device *skdev,
struct skd_request_context *skreq)
{
- u32 msg_slot;
- struct skd_fitmsg_context *skmsg;
-
- u32 timo_slot;
-
- /*
- * Reclaim the FIT msg buffer if this is
- * the first of the requests it carried to
- * be completed. The FIT msg buffer used to
- * send this request cannot be reused until
- * we are sure the s1120 card has copied
- * it to its memory. The FIT msg might have
- * contained several requests. As soon as
- * any of them are completed we know that
- * the entire FIT msg was transferred.
- * Only the first completed request will
- * match the FIT msg buffer id. The FIT
- * msg buffer id is immediately updated.
- * When subsequent requests complete the FIT
- * msg buffer id won't match, so we know
- * quite cheaply that it is already done.
- */
- msg_slot = skreq->fitmsg_id & SKD_ID_SLOT_MASK;
- SKD_ASSERT(msg_slot < skdev->num_fitmsg_context);
-
- skmsg = &skdev->skmsg_table[msg_slot];
- if (skmsg->id == skreq->fitmsg_id) {
- SKD_ASSERT(skmsg->state == SKD_MSG_STATE_BUSY);
- SKD_ASSERT(skmsg->outstanding > 0);
- skmsg->outstanding--;
- if (skmsg->outstanding == 0) {
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- skmsg->next = skdev->skmsg_free_list;
- skdev->skmsg_free_list = skmsg;
- }
- }
-
- /*
- * Decrease the number of active requests.
- * Also decrements the count in the timeout slot.
- */
- SKD_ASSERT(skdev->in_flight > 0);
- skdev->in_flight -= 1;
-
- timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
- SKD_ASSERT(skdev->timeout_slot[timo_slot] > 0);
- skdev->timeout_slot[timo_slot] -= 1;
-
- /*
- * Reset backpointer
- */
- skreq->req = NULL;
-
/*
* Reclaim the skd_request_context
*/
skreq->state = SKD_REQ_STATE_IDLE;
- skreq->id += SKD_ID_INCR;
- skreq->next = skdev->skreq_free_list;
- skdev->skreq_free_list = skreq;
}
-#define DRIVER_INQ_EVPD_PAGE_CODE 0xDA
-
-static void skd_do_inq_page_00(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr,
- uint8_t *cdb, uint8_t *buf)
-{
- uint16_t insert_pt, max_bytes, drive_pages, drive_bytes, new_size;
-
- /* Caller requested "supported pages". The driver needs to insert
- * its page.
- */
- pr_debug("%s:%s:%d skd_do_driver_inquiry: modify supported pages.\n",
- skdev->name, __func__, __LINE__);
-
- /* If the device rejected the request because the CDB was
- * improperly formed, then just leave.
- */
- if (skcomp->status == SAM_STAT_CHECK_CONDITION &&
- skerr->key == ILLEGAL_REQUEST && skerr->code == 0x24)
- return;
-
- /* Get the amount of space the caller allocated */
- max_bytes = (cdb[3] << 8) | cdb[4];
-
- /* Get the number of pages actually returned by the device */
- drive_pages = (buf[2] << 8) | buf[3];
- drive_bytes = drive_pages + 4;
- new_size = drive_pages + 1;
-
- /* Supported pages must be in numerical order, so find where
- * the driver page needs to be inserted into the list of
- * pages returned by the device.
- */
- for (insert_pt = 4; insert_pt < drive_bytes; insert_pt++) {
- if (buf[insert_pt] == DRIVER_INQ_EVPD_PAGE_CODE)
- return; /* Device using this page code. abort */
- else if (buf[insert_pt] > DRIVER_INQ_EVPD_PAGE_CODE)
- break;
- }
-
- if (insert_pt < max_bytes) {
- uint16_t u;
-
- /* Shift everything up one byte to make room. */
- for (u = new_size + 3; u > insert_pt; u--)
- buf[u] = buf[u - 1];
- buf[insert_pt] = DRIVER_INQ_EVPD_PAGE_CODE;
-
- /* SCSI byte order increment of num_returned_bytes by 1 */
- skcomp->num_returned_bytes =
- be32_to_cpu(skcomp->num_returned_bytes) + 1;
- skcomp->num_returned_bytes =
- be32_to_cpu(skcomp->num_returned_bytes);
- }
-
- /* update page length field to reflect the driver's page too */
- buf[2] = (uint8_t)((new_size >> 8) & 0xFF);
- buf[3] = (uint8_t)((new_size >> 0) & 0xFF);
-}
-
-static void skd_get_link_info(struct pci_dev *pdev, u8 *speed, u8 *width)
-{
- int pcie_reg;
- u16 pci_bus_speed;
- u8 pci_lanes;
-
- pcie_reg = pci_find_capability(pdev, PCI_CAP_ID_EXP);
- if (pcie_reg) {
- u16 linksta;
- pci_read_config_word(pdev, pcie_reg + PCI_EXP_LNKSTA, &linksta);
-
- pci_bus_speed = linksta & 0xF;
- pci_lanes = (linksta & 0x3F0) >> 4;
- } else {
- *speed = STEC_LINK_UNKNOWN;
- *width = 0xFF;
- return;
- }
-
- switch (pci_bus_speed) {
- case 1:
- *speed = STEC_LINK_2_5GTS;
- break;
- case 2:
- *speed = STEC_LINK_5GTS;
- break;
- case 3:
- *speed = STEC_LINK_8GTS;
- break;
- default:
- *speed = STEC_LINK_UNKNOWN;
- break;
- }
-
- if (pci_lanes <= 0x20)
- *width = pci_lanes;
- else
- *width = 0xFF;
-}
-
-static void skd_do_inq_page_da(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr,
- uint8_t *cdb, uint8_t *buf)
-{
- struct pci_dev *pdev = skdev->pdev;
- unsigned max_bytes;
- struct driver_inquiry_data inq;
- u16 val;
-
- pr_debug("%s:%s:%d skd_do_driver_inquiry: return driver page\n",
- skdev->name, __func__, __LINE__);
-
- memset(&inq, 0, sizeof(inq));
-
- inq.page_code = DRIVER_INQ_EVPD_PAGE_CODE;
-
- skd_get_link_info(pdev, &inq.pcie_link_speed, &inq.pcie_link_lanes);
- inq.pcie_bus_number = cpu_to_be16(pdev->bus->number);
- inq.pcie_device_number = PCI_SLOT(pdev->devfn);
- inq.pcie_function_number = PCI_FUNC(pdev->devfn);
-
- pci_read_config_word(pdev, PCI_VENDOR_ID, &val);
- inq.pcie_vendor_id = cpu_to_be16(val);
-
- pci_read_config_word(pdev, PCI_DEVICE_ID, &val);
- inq.pcie_device_id = cpu_to_be16(val);
-
- pci_read_config_word(pdev, PCI_SUBSYSTEM_VENDOR_ID, &val);
- inq.pcie_subsystem_vendor_id = cpu_to_be16(val);
-
- pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &val);
- inq.pcie_subsystem_device_id = cpu_to_be16(val);
-
- /* Driver version, fixed lenth, padded with spaces on the right */
- inq.driver_version_length = sizeof(inq.driver_version);
- memset(&inq.driver_version, ' ', sizeof(inq.driver_version));
- memcpy(inq.driver_version, DRV_VER_COMPL,
- min(sizeof(inq.driver_version), strlen(DRV_VER_COMPL)));
-
- inq.page_length = cpu_to_be16((sizeof(inq) - 4));
-
- /* Clear the error set by the device */
- skcomp->status = SAM_STAT_GOOD;
- memset((void *)skerr, 0, sizeof(*skerr));
-
- /* copy response into output buffer */
- max_bytes = (cdb[3] << 8) | cdb[4];
- memcpy(buf, &inq, min_t(unsigned, max_bytes, sizeof(inq)));
-
- skcomp->num_returned_bytes =
- be32_to_cpu(min_t(uint16_t, max_bytes, sizeof(inq)));
-}
-
-static void skd_do_driver_inq(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr,
- uint8_t *cdb, uint8_t *buf)
-{
- if (!buf)
- return;
- else if (cdb[0] != INQUIRY)
- return; /* Not an INQUIRY */
- else if ((cdb[1] & 1) == 0)
- return; /* EVPD not set */
- else if (cdb[2] == 0)
- /* Need to add driver's page to supported pages list */
- skd_do_inq_page_00(skdev, skcomp, skerr, cdb, buf);
- else if (cdb[2] == DRIVER_INQ_EVPD_PAGE_CODE)
- /* Caller requested driver's page */
- skd_do_inq_page_da(skdev, skcomp, skerr, cdb, buf);
-}
-
-static unsigned char *skd_sg_1st_page_ptr(struct scatterlist *sg)
-{
- if (!sg)
- return NULL;
- if (!sg_page(sg))
- return NULL;
- return sg_virt(sg);
-}
-
-static void skd_process_scsi_inq(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
- struct skd_special_context *skspcl)
-{
- uint8_t *buf;
- struct fit_msg_hdr *fmh = (struct fit_msg_hdr *)skspcl->msg_buf;
- struct skd_scsi_request *scsi_req = (struct skd_scsi_request *)&fmh[1];
-
- dma_sync_sg_for_cpu(skdev->class_dev, skspcl->req.sg, skspcl->req.n_sg,
- skspcl->req.sg_data_dir);
- buf = skd_sg_1st_page_ptr(skspcl->req.sg);
-
- if (buf)
- skd_do_driver_inq(skdev, skcomp, skerr, scsi_req->cdb, buf);
-}
-
-
static int skd_isr_completion_posted(struct skd_device *skdev,
int limit, int *enqueued)
{
- volatile struct fit_completion_entry_v1 *skcmp = NULL;
- volatile struct fit_comp_error_info *skerr;
+ struct fit_completion_entry_v1 *skcmp;
+ struct fit_comp_error_info *skerr;
u16 req_id;
- u32 req_slot;
+ u32 tag;
+ u16 hwq = 0;
+ struct request *rq;
struct skd_request_context *skreq;
- u16 cmp_cntxt = 0;
- u8 cmp_status = 0;
- u8 cmp_cycle = 0;
- u32 cmp_bytes = 0;
+ u16 cmp_cntxt;
+ u8 cmp_status;
+ u8 cmp_cycle;
+ u32 cmp_bytes;
int rc = 0;
int processed = 0;
+ lockdep_assert_held(&skdev->lock);
+
for (;; ) {
SKD_ASSERT(skdev->skcomp_ix < SKD_N_COMPLETION_ENTRY);
@@ -2652,16 +1479,14 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
skerr = &skdev->skerr_table[skdev->skcomp_ix];
- pr_debug("%s:%s:%d "
- "cycle=%d ix=%d got cycle=%d cmdctxt=0x%x stat=%d "
- "busy=%d rbytes=0x%x proto=%d\n",
- skdev->name, __func__, __LINE__, skdev->skcomp_cycle,
- skdev->skcomp_ix, cmp_cycle, cmp_cntxt, cmp_status,
- skdev->in_flight, cmp_bytes, skdev->proto_ver);
+ dev_dbg(&skdev->pdev->dev,
+ "cycle=%d ix=%d got cycle=%d cmdctxt=0x%x stat=%d busy=%d rbytes=0x%x proto=%d\n",
+ skdev->skcomp_cycle, skdev->skcomp_ix, cmp_cycle,
+ cmp_cntxt, cmp_status, skd_in_flight(skdev),
+ cmp_bytes, skdev->proto_ver);
if (cmp_cycle != skdev->skcomp_cycle) {
- pr_debug("%s:%s:%d end of completions\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "end of completions\n");
break;
}
/*
@@ -2680,49 +1505,38 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
* r/w request (see skd_start() above) or a special request.
*/
req_id = cmp_cntxt;
- req_slot = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
+ tag = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
/* Is this other than a r/w request? */
- if (req_slot >= skdev->num_req_context) {
+ if (tag >= skdev->num_req_context) {
/*
* This is not a completion for a r/w request.
*/
+ WARN_ON_ONCE(blk_mq_tag_to_rq(skdev->tag_set.tags[hwq],
+ tag));
skd_complete_other(skdev, skcmp, skerr);
continue;
}
- skreq = &skdev->skreq_table[req_slot];
+ rq = blk_mq_tag_to_rq(skdev->tag_set.tags[hwq], tag);
+ if (WARN(!rq, "No request for tag %#x -> %#x\n", cmp_cntxt,
+ tag))
+ continue;
+ skreq = blk_mq_rq_to_pdu(rq);
/*
* Make sure the request ID for the slot matches.
*/
if (skreq->id != req_id) {
- pr_debug("%s:%s:%d mismatch comp_id=0x%x req_id=0x%x\n",
- skdev->name, __func__, __LINE__,
- req_id, skreq->id);
- {
- u16 new_id = cmp_cntxt;
- pr_err("(%s): Completion mismatch "
- "comp_id=0x%04x skreq=0x%04x new=0x%04x\n",
- skd_name(skdev), req_id,
- skreq->id, new_id);
+ dev_err(&skdev->pdev->dev,
+ "Completion mismatch comp_id=0x%04x skreq=0x%04x new=0x%04x\n",
+ req_id, skreq->id, cmp_cntxt);
- continue;
- }
+ continue;
}
SKD_ASSERT(skreq->state == SKD_REQ_STATE_BUSY);
- if (skreq->state == SKD_REQ_STATE_ABORTED) {
- pr_debug("%s:%s:%d reclaim req %p id=%04x\n",
- skdev->name, __func__, __LINE__,
- skreq, skreq->id);
- /* a previously timed out command can
- * now be cleaned up */
- skd_release_skreq(skdev, skreq);
- continue;
- }
-
skreq->completion = *skcmp;
if (unlikely(cmp_status == SAM_STAT_CHECK_CONDITION)) {
skreq->err_info = *skerr;
@@ -2734,27 +1548,17 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
if (skreq->n_sg > 0)
skd_postop_sg_list(skdev, skreq);
- if (!skreq->req) {
- pr_debug("%s:%s:%d NULL backptr skdreq %p, "
- "req=0x%x req_id=0x%x\n",
- skdev->name, __func__, __LINE__,
- skreq, skreq->id, req_id);
- } else {
- /*
- * Capture the outcome and post it back to the
- * native request.
- */
- if (likely(cmp_status == SAM_STAT_GOOD))
- skd_end_request(skdev, skreq, BLK_STS_OK);
- else
- skd_resolve_req_exception(skdev, skreq);
- }
+ skd_release_skreq(skdev, skreq);
/*
- * Release the skreq, its FIT msg (if one), timeout slot,
- * and queue depth.
+ * Capture the outcome and post it back to the native request.
*/
- skd_release_skreq(skdev, skreq);
+ if (likely(cmp_status == SAM_STAT_GOOD)) {
+ skreq->status = BLK_STS_OK;
+ blk_mq_complete_request(rq);
+ } else {
+ skd_resolve_req_exception(skdev, skreq, rq);
+ }
/* skd_isr_comp_limit equal zero means no limit */
if (limit) {
@@ -2765,8 +1569,8 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
}
}
- if ((skdev->state == SKD_DRVR_STATE_PAUSING)
- && (skdev->in_flight) == 0) {
+ if (skdev->state == SKD_DRVR_STATE_PAUSING &&
+ skd_in_flight(skdev) == 0) {
skdev->state = SKD_DRVR_STATE_PAUSED;
wake_up_interruptible(&skdev->waitq);
}
@@ -2775,21 +1579,22 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
}
static void skd_complete_other(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr)
+ struct fit_completion_entry_v1 *skcomp,
+ struct fit_comp_error_info *skerr)
{
u32 req_id = 0;
u32 req_table;
u32 req_slot;
struct skd_special_context *skspcl;
+ lockdep_assert_held(&skdev->lock);
+
req_id = skcomp->tag;
req_table = req_id & SKD_ID_TABLE_MASK;
req_slot = req_id & SKD_ID_SLOT_MASK;
- pr_debug("%s:%s:%d table=0x%x id=0x%x slot=%d\n",
- skdev->name, __func__, __LINE__,
- req_table, req_id, req_slot);
+ dev_dbg(&skdev->pdev->dev, "table=0x%x id=0x%x slot=%d\n", req_table,
+ req_id, req_slot);
/*
* Based on the request id, determine how to dispatch this completion.
@@ -2799,28 +1604,12 @@ static void skd_complete_other(struct skd_device *skdev,
switch (req_table) {
case SKD_ID_RW_REQUEST:
/*
- * The caller, skd_completion_posted_isr() above,
+ * The caller, skd_isr_completion_posted() above,
* handles r/w requests. The only way we get here
* is if the req_slot is out of bounds.
*/
break;
- case SKD_ID_SPECIAL_REQUEST:
- /*
- * Make sure the req_slot is in bounds and that the id
- * matches.
- */
- if (req_slot < skdev->n_special) {
- skspcl = &skdev->skspcl_table[req_slot];
- if (skspcl->req.id == req_id &&
- skspcl->req.state == SKD_REQ_STATE_BUSY) {
- skd_complete_special(skdev,
- skcomp, skerr, skspcl);
- return;
- }
- }
- break;
-
case SKD_ID_INTERNAL:
if (req_slot == 0) {
skspcl = &skdev->internal_skspcl;
@@ -2851,72 +1640,9 @@ static void skd_complete_other(struct skd_device *skdev,
*/
}
-static void skd_complete_special(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
- struct skd_special_context *skspcl)
-{
- pr_debug("%s:%s:%d completing special request %p\n",
- skdev->name, __func__, __LINE__, skspcl);
- if (skspcl->orphaned) {
- /* Discard orphaned request */
- /* ?: Can this release directly or does it need
- * to use a worker? */
- pr_debug("%s:%s:%d release orphaned %p\n",
- skdev->name, __func__, __LINE__, skspcl);
- skd_release_special(skdev, skspcl);
- return;
- }
-
- skd_process_scsi_inq(skdev, skcomp, skerr, skspcl);
-
- skspcl->req.state = SKD_REQ_STATE_COMPLETED;
- skspcl->req.completion = *skcomp;
- skspcl->req.err_info = *skerr;
-
- skd_log_check_status(skdev, skspcl->req.completion.status, skerr->key,
- skerr->code, skerr->qual, skerr->fruc);
-
- wake_up_interruptible(&skdev->waitq);
-}
-
-/* assume spinlock is already held */
-static void skd_release_special(struct skd_device *skdev,
- struct skd_special_context *skspcl)
-{
- int i, was_depleted;
-
- for (i = 0; i < skspcl->req.n_sg; i++) {
- struct page *page = sg_page(&skspcl->req.sg[i]);
- __free_page(page);
- }
-
- was_depleted = (skdev->skspcl_free_list == NULL);
-
- skspcl->req.state = SKD_REQ_STATE_IDLE;
- skspcl->req.id += SKD_ID_INCR;
- skspcl->req.next =
- (struct skd_request_context *)skdev->skspcl_free_list;
- skdev->skspcl_free_list = (struct skd_special_context *)skspcl;
-
- if (was_depleted) {
- pr_debug("%s:%s:%d skspcl was depleted\n",
- skdev->name, __func__, __LINE__);
- /* Free list was depleted. Their might be waiters. */
- wake_up_interruptible(&skdev->waitq);
- }
-}
-
static void skd_reset_skcomp(struct skd_device *skdev)
{
- u32 nbytes;
- struct fit_completion_entry_v1 *skcomp;
-
- nbytes = sizeof(*skcomp) * SKD_N_COMPLETION_ENTRY;
- nbytes += sizeof(struct fit_comp_error_info) * SKD_N_COMPLETION_ENTRY;
-
- memset(skdev->skcomp_table, 0, nbytes);
+ memset(skdev->skcomp_table, 0, SKD_SKCOMP_SIZE);
skdev->skcomp_ix = 0;
skdev->skcomp_cycle = 1;
@@ -2941,7 +1667,7 @@ static void skd_completion_worker(struct work_struct *work)
* process everything in compq
*/
skd_isr_completion_posted(skdev, 0, &flush_enqueued);
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
spin_unlock_irqrestore(&skdev->lock, flags);
}
@@ -2951,14 +1677,13 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev);
static irqreturn_t
skd_isr(int irq, void *ptr)
{
- struct skd_device *skdev;
+ struct skd_device *skdev = ptr;
u32 intstat;
u32 ack;
int rc = 0;
int deferred = 0;
int flush_enqueued = 0;
- skdev = (struct skd_device *)ptr;
spin_lock(&skdev->lock);
for (;; ) {
@@ -2967,8 +1692,8 @@ skd_isr(int irq, void *ptr)
ack = FIT_INT_DEF_MASK;
ack &= intstat;
- pr_debug("%s:%s:%d intstat=0x%x ack=0x%x\n",
- skdev->name, __func__, __LINE__, intstat, ack);
+ dev_dbg(&skdev->pdev->dev, "intstat=0x%x ack=0x%x\n", intstat,
+ ack);
/* As long as there is an int pending on device, keep
* running loop. When none, get out, but if we've never
@@ -3018,12 +1743,12 @@ skd_isr(int irq, void *ptr)
}
if (unlikely(flush_enqueued))
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
if (deferred)
schedule_work(&skdev->completion_worker);
else if (!flush_enqueued)
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
spin_unlock(&skdev->lock);
@@ -3033,13 +1758,13 @@ skd_isr(int irq, void *ptr)
static void skd_drive_fault(struct skd_device *skdev)
{
skdev->state = SKD_DRVR_STATE_FAULT;
- pr_err("(%s): Drive FAULT\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive FAULT\n");
}
static void skd_drive_disappeared(struct skd_device *skdev)
{
skdev->state = SKD_DRVR_STATE_DISAPPEARED;
- pr_err("(%s): Drive DISAPPEARED\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive DISAPPEARED\n");
}
static void skd_isr_fwstate(struct skd_device *skdev)
@@ -3052,10 +1777,9 @@ static void skd_isr_fwstate(struct skd_device *skdev)
sense = SKD_READL(skdev, FIT_STATUS);
state = sense & FIT_SR_DRIVE_STATE_MASK;
- pr_err("(%s): s1120 state %s(%d)=>%s(%d)\n",
- skd_name(skdev),
- skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
- skd_drive_state_to_str(state), state);
+ dev_err(&skdev->pdev->dev, "s1120 state %s(%d)=>%s(%d)\n",
+ skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
+ skd_drive_state_to_str(state), state);
skdev->drive_state = state;
@@ -3066,7 +1790,7 @@ static void skd_isr_fwstate(struct skd_device *skdev)
break;
}
if (skdev->state == SKD_DRVR_STATE_RESTARTING)
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
if (skdev->state == SKD_DRVR_STATE_WAIT_BOOT) {
skdev->timer_countdown = SKD_STARTING_TIMO;
skdev->state = SKD_DRVR_STATE_STARTING;
@@ -3087,11 +1811,11 @@ static void skd_isr_fwstate(struct skd_device *skdev)
skdev->cur_max_queue_depth * 2 / 3 + 1;
if (skdev->queue_low_water_mark < 1)
skdev->queue_low_water_mark = 1;
- pr_info(
- "(%s): Queue depth limit=%d dev=%d lowat=%d\n",
- skd_name(skdev),
- skdev->cur_max_queue_depth,
- skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
+ dev_info(&skdev->pdev->dev,
+ "Queue depth limit=%d dev=%d lowat=%d\n",
+ skdev->cur_max_queue_depth,
+ skdev->dev_max_queue_depth,
+ skdev->queue_low_water_mark);
skd_refresh_device_data(skdev);
break;
@@ -3107,7 +1831,7 @@ static void skd_isr_fwstate(struct skd_device *skdev)
*/
skdev->state = SKD_DRVR_STATE_BUSY_SANITIZE;
skdev->timer_countdown = SKD_TIMER_SECONDS(3);
- blk_start_queue(skdev->queue);
+ schedule_work(&skdev->start_queue);
break;
case FIT_SR_DRIVE_BUSY_ERASE:
skdev->state = SKD_DRVR_STATE_BUSY_ERASE;
@@ -3128,8 +1852,7 @@ static void skd_isr_fwstate(struct skd_device *skdev)
}
break;
case FIT_SR_DRIVE_FW_BOOTING:
- pr_debug("%s:%s:%d ISR FIT_SR_DRIVE_FW_BOOTING %s\n",
- skdev->name, __func__, __LINE__, skdev->name);
+ dev_dbg(&skdev->pdev->dev, "ISR FIT_SR_DRIVE_FW_BOOTING\n");
skdev->state = SKD_DRVR_STATE_WAIT_BOOT;
skdev->timer_countdown = SKD_WAIT_BOOT_TIMO;
break;
@@ -3141,17 +1864,17 @@ static void skd_isr_fwstate(struct skd_device *skdev)
case FIT_SR_DRIVE_FAULT:
skd_drive_fault(skdev);
- skd_recover_requests(skdev, 0);
- blk_start_queue(skdev->queue);
+ skd_recover_requests(skdev);
+ schedule_work(&skdev->start_queue);
break;
/* PCIe bus returned all Fs? */
case 0xFF:
- pr_info("(%s): state=0x%x sense=0x%x\n",
- skd_name(skdev), state, sense);
+ dev_info(&skdev->pdev->dev, "state=0x%x sense=0x%x\n", state,
+ sense);
skd_drive_disappeared(skdev);
- skd_recover_requests(skdev, 0);
- blk_start_queue(skdev->queue);
+ skd_recover_requests(skdev);
+ schedule_work(&skdev->start_queue);
break;
default:
/*
@@ -3159,92 +1882,33 @@ static void skd_isr_fwstate(struct skd_device *skdev)
*/
break;
}
- pr_err("(%s): Driver state %s(%d)=>%s(%d)\n",
- skd_name(skdev),
- skd_skdev_state_to_str(prev_driver_state), prev_driver_state,
- skd_skdev_state_to_str(skdev->state), skdev->state);
+ dev_err(&skdev->pdev->dev, "Driver state %s(%d)=>%s(%d)\n",
+ skd_skdev_state_to_str(prev_driver_state), prev_driver_state,
+ skd_skdev_state_to_str(skdev->state), skdev->state);
}
-static void skd_recover_requests(struct skd_device *skdev, int requeue)
+static void skd_recover_request(struct request *req, void *data, bool reserved)
{
- int i;
-
- for (i = 0; i < skdev->num_req_context; i++) {
- struct skd_request_context *skreq = &skdev->skreq_table[i];
-
- if (skreq->state == SKD_REQ_STATE_BUSY) {
- skd_log_skreq(skdev, skreq, "recover");
-
- SKD_ASSERT((skreq->id & SKD_ID_INCR) != 0);
- SKD_ASSERT(skreq->req != NULL);
-
- /* Release DMA resources for the request. */
- if (skreq->n_sg > 0)
- skd_postop_sg_list(skdev, skreq);
-
- if (requeue &&
- (unsigned long) ++skreq->req->special <
- SKD_MAX_RETRIES)
- blk_requeue_request(skdev->queue, skreq->req);
- else
- skd_end_request(skdev, skreq, BLK_STS_IOERR);
-
- skreq->req = NULL;
-
- skreq->state = SKD_REQ_STATE_IDLE;
- skreq->id += SKD_ID_INCR;
- }
- if (i > 0)
- skreq[-1].next = skreq;
- skreq->next = NULL;
- }
- skdev->skreq_free_list = skdev->skreq_table;
+ struct skd_device *const skdev = data;
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
- for (i = 0; i < skdev->num_fitmsg_context; i++) {
- struct skd_fitmsg_context *skmsg = &skdev->skmsg_table[i];
+ if (skreq->state != SKD_REQ_STATE_BUSY)
+ return;
- if (skmsg->state == SKD_MSG_STATE_BUSY) {
- skd_log_skmsg(skdev, skmsg, "salvaged");
- SKD_ASSERT((skmsg->id & SKD_ID_INCR) != 0);
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- }
- if (i > 0)
- skmsg[-1].next = skmsg;
- skmsg->next = NULL;
- }
- skdev->skmsg_free_list = skdev->skmsg_table;
+ skd_log_skreq(skdev, skreq, "recover");
- for (i = 0; i < skdev->n_special; i++) {
- struct skd_special_context *skspcl = &skdev->skspcl_table[i];
+ /* Release DMA resources for the request. */
+ if (skreq->n_sg > 0)
+ skd_postop_sg_list(skdev, skreq);
- /* If orphaned, reclaim it because it has already been reported
- * to the process as an error (it was just waiting for
- * a completion that didn't come, and now it will never come)
- * If busy, change to a state that will cause it to error
- * out in the wait routine and let it do the normal
- * reporting and reclaiming
- */
- if (skspcl->req.state == SKD_REQ_STATE_BUSY) {
- if (skspcl->orphaned) {
- pr_debug("%s:%s:%d orphaned %p\n",
- skdev->name, __func__, __LINE__,
- skspcl);
- skd_release_special(skdev, skspcl);
- } else {
- pr_debug("%s:%s:%d not orphaned %p\n",
- skdev->name, __func__, __LINE__,
- skspcl);
- skspcl->req.state = SKD_REQ_STATE_ABORTED;
- }
- }
- }
- skdev->skspcl_free_list = skdev->skspcl_table;
-
- for (i = 0; i < SKD_N_TIMEOUT_SLOT; i++)
- skdev->timeout_slot[i] = 0;
+ skreq->state = SKD_REQ_STATE_IDLE;
+ skreq->status = BLK_STS_IOERR;
+ blk_mq_complete_request(req);
+}
- skdev->in_flight = 0;
+static void skd_recover_requests(struct skd_device *skdev)
+{
+ blk_mq_tagset_busy_iter(&skdev->tag_set, skd_recover_request, skdev);
}
static void skd_isr_msg_from_dev(struct skd_device *skdev)
@@ -3255,8 +1919,8 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev)
mfd = SKD_READL(skdev, FIT_MSG_FROM_DEVICE);
- pr_debug("%s:%s:%d mfd=0x%x last_mtd=0x%x\n",
- skdev->name, __func__, __LINE__, mfd, skdev->last_mtd);
+ dev_dbg(&skdev->pdev->dev, "mfd=0x%x last_mtd=0x%x\n", mfd,
+ skdev->last_mtd);
/* ignore any mtd that is an ack for something we didn't send */
if (FIT_MXD_TYPE(mfd) != FIT_MXD_TYPE(skdev->last_mtd))
@@ -3267,13 +1931,10 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev)
skdev->proto_ver = FIT_PROTOCOL_MAJOR_VER(mfd);
if (skdev->proto_ver != FIT_PROTOCOL_VERSION_1) {
- pr_err("(%s): protocol mismatch\n",
- skdev->name);
- pr_err("(%s): got=%d support=%d\n",
- skdev->name, skdev->proto_ver,
- FIT_PROTOCOL_VERSION_1);
- pr_err("(%s): please upgrade driver\n",
- skdev->name);
+ dev_err(&skdev->pdev->dev, "protocol mismatch\n");
+ dev_err(&skdev->pdev->dev, " got=%d support=%d\n",
+ skdev->proto_ver, FIT_PROTOCOL_VERSION_1);
+ dev_err(&skdev->pdev->dev, " please upgrade driver\n");
skdev->state = SKD_DRVR_STATE_PROTOCOL_MISMATCH;
skd_soft_reset(skdev);
break;
@@ -3327,9 +1988,8 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev)
SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
skdev->last_mtd = mtd;
- pr_err("(%s): Time sync driver=0x%x device=0x%x\n",
- skd_name(skdev),
- skdev->connect_time_stamp, skdev->drive_jiffies);
+ dev_err(&skdev->pdev->dev, "Time sync driver=0x%x device=0x%x\n",
+ skdev->connect_time_stamp, skdev->drive_jiffies);
break;
case FIT_MTD_ARM_QUEUE:
@@ -3351,8 +2011,7 @@ static void skd_disable_interrupts(struct skd_device *skdev)
sense = SKD_READL(skdev, FIT_CONTROL);
sense &= ~FIT_CR_ENABLE_INTERRUPTS;
SKD_WRITEL(skdev, sense, FIT_CONTROL);
- pr_debug("%s:%s:%d sense 0x%x\n",
- skdev->name, __func__, __LINE__, sense);
+ dev_dbg(&skdev->pdev->dev, "sense 0x%x\n", sense);
/* Note that the 1s is written. A 1-bit means
* disable, a 0 means enable.
@@ -3371,13 +2030,11 @@ static void skd_enable_interrupts(struct skd_device *skdev)
/* Note that the compliment of mask is written. A 1-bit means
* disable, a 0 means enable. */
SKD_WRITEL(skdev, ~val, FIT_INT_MASK_HOST);
- pr_debug("%s:%s:%d interrupt mask=0x%x\n",
- skdev->name, __func__, __LINE__, ~val);
+ dev_dbg(&skdev->pdev->dev, "interrupt mask=0x%x\n", ~val);
val = SKD_READL(skdev, FIT_CONTROL);
val |= FIT_CR_ENABLE_INTERRUPTS;
- pr_debug("%s:%s:%d control=0x%x\n",
- skdev->name, __func__, __LINE__, val);
+ dev_dbg(&skdev->pdev->dev, "control=0x%x\n", val);
SKD_WRITEL(skdev, val, FIT_CONTROL);
}
@@ -3393,8 +2050,7 @@ static void skd_soft_reset(struct skd_device *skdev)
val = SKD_READL(skdev, FIT_CONTROL);
val |= (FIT_CR_SOFT_RESET);
- pr_debug("%s:%s:%d control=0x%x\n",
- skdev->name, __func__, __LINE__, val);
+ dev_dbg(&skdev->pdev->dev, "control=0x%x\n", val);
SKD_WRITEL(skdev, val, FIT_CONTROL);
}
@@ -3411,8 +2067,7 @@ static void skd_start_device(struct skd_device *skdev)
sense = SKD_READL(skdev, FIT_STATUS);
- pr_debug("%s:%s:%d initial status=0x%x\n",
- skdev->name, __func__, __LINE__, sense);
+ dev_dbg(&skdev->pdev->dev, "initial status=0x%x\n", sense);
state = sense & FIT_SR_DRIVE_STATE_MASK;
skdev->drive_state = state;
@@ -3425,25 +2080,23 @@ static void skd_start_device(struct skd_device *skdev)
switch (skdev->drive_state) {
case FIT_SR_DRIVE_OFFLINE:
- pr_err("(%s): Drive offline...\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive offline...\n");
break;
case FIT_SR_DRIVE_FW_BOOTING:
- pr_debug("%s:%s:%d FIT_SR_DRIVE_FW_BOOTING %s\n",
- skdev->name, __func__, __LINE__, skdev->name);
+ dev_dbg(&skdev->pdev->dev, "FIT_SR_DRIVE_FW_BOOTING\n");
skdev->state = SKD_DRVR_STATE_WAIT_BOOT;
skdev->timer_countdown = SKD_WAIT_BOOT_TIMO;
break;
case FIT_SR_DRIVE_BUSY_SANITIZE:
- pr_info("(%s): Start: BUSY_SANITIZE\n",
- skd_name(skdev));
+ dev_info(&skdev->pdev->dev, "Start: BUSY_SANITIZE\n");
skdev->state = SKD_DRVR_STATE_BUSY_SANITIZE;
skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
break;
case FIT_SR_DRIVE_BUSY_ERASE:
- pr_info("(%s): Start: BUSY_ERASE\n", skd_name(skdev));
+ dev_info(&skdev->pdev->dev, "Start: BUSY_ERASE\n");
skdev->state = SKD_DRVR_STATE_BUSY_ERASE;
skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
break;
@@ -3454,14 +2107,13 @@ static void skd_start_device(struct skd_device *skdev)
break;
case FIT_SR_DRIVE_BUSY:
- pr_err("(%s): Drive Busy...\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive Busy...\n");
skdev->state = SKD_DRVR_STATE_BUSY;
skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
break;
case FIT_SR_DRIVE_SOFT_RESET:
- pr_err("(%s) drive soft reset in prog\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "drive soft reset in prog\n");
break;
case FIT_SR_DRIVE_FAULT:
@@ -3471,9 +2123,8 @@ static void skd_start_device(struct skd_device *skdev)
*/
skd_drive_fault(skdev);
/*start the queue so we can respond with error to requests */
- pr_debug("%s:%s:%d starting %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_start_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev, "starting queue\n");
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
@@ -3483,38 +2134,33 @@ static void skd_start_device(struct skd_device *skdev)
* to the BAR1 addresses. */
skd_drive_disappeared(skdev);
/*start the queue so we can respond with error to requests */
- pr_debug("%s:%s:%d starting %s queue to error-out reqs\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_start_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev,
+ "starting queue to error-out reqs\n");
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
default:
- pr_err("(%s) Start: unknown state %x\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev, "Start: unknown state %x\n",
+ skdev->drive_state);
break;
}
state = SKD_READL(skdev, FIT_CONTROL);
- pr_debug("%s:%s:%d FIT Control Status=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "FIT Control Status=0x%x\n", state);
state = SKD_READL(skdev, FIT_INT_STATUS_HOST);
- pr_debug("%s:%s:%d Intr Status=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "Intr Status=0x%x\n", state);
state = SKD_READL(skdev, FIT_INT_MASK_HOST);
- pr_debug("%s:%s:%d Intr Mask=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "Intr Mask=0x%x\n", state);
state = SKD_READL(skdev, FIT_MSG_FROM_DEVICE);
- pr_debug("%s:%s:%d Msg from Dev=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "Msg from Dev=0x%x\n", state);
state = SKD_READL(skdev, FIT_HW_VERSION);
- pr_debug("%s:%s:%d HW version=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "HW version=0x%x\n", state);
spin_unlock_irqrestore(&skdev->lock, flags);
}
@@ -3529,14 +2175,12 @@ static void skd_stop_device(struct skd_device *skdev)
spin_lock_irqsave(&skdev->lock, flags);
if (skdev->state != SKD_DRVR_STATE_ONLINE) {
- pr_err("(%s): skd_stop_device not online no sync\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s not online no sync\n", __func__);
goto stop_out;
}
if (skspcl->req.state != SKD_REQ_STATE_IDLE) {
- pr_err("(%s): skd_stop_device no special\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s no special\n", __func__);
goto stop_out;
}
@@ -3554,16 +2198,13 @@ static void skd_stop_device(struct skd_device *skdev)
switch (skdev->sync_done) {
case 0:
- pr_err("(%s): skd_stop_device no sync\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s no sync\n", __func__);
break;
case 1:
- pr_err("(%s): skd_stop_device sync done\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s sync done\n", __func__);
break;
default:
- pr_err("(%s): skd_stop_device sync error\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s sync error\n", __func__);
}
stop_out:
@@ -3593,8 +2234,8 @@ stop_out:
}
if (dev_state != FIT_SR_DRIVE_INIT)
- pr_err("(%s): skd_stop_device state error 0x%02x\n",
- skd_name(skdev), dev_state);
+ dev_err(&skdev->pdev->dev, "%s state error 0x%02x\n", __func__,
+ dev_state);
}
/* assume spinlock is held */
@@ -3607,8 +2248,7 @@ static void skd_restart_device(struct skd_device *skdev)
state = SKD_READL(skdev, FIT_STATUS);
- pr_debug("%s:%s:%d drive status=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "drive status=0x%x\n", state);
state &= FIT_SR_DRIVE_STATE_MASK;
skdev->drive_state = state;
@@ -3628,9 +2268,8 @@ static int skd_quiesce_dev(struct skd_device *skdev)
switch (skdev->state) {
case SKD_DRVR_STATE_BUSY:
case SKD_DRVR_STATE_BUSY_IMMINENT:
- pr_debug("%s:%s:%d stopping %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_stop_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev, "stopping queue\n");
+ blk_mq_stop_hw_queues(skdev->queue);
break;
case SKD_DRVR_STATE_ONLINE:
case SKD_DRVR_STATE_STOPPING:
@@ -3642,8 +2281,8 @@ static int skd_quiesce_dev(struct skd_device *skdev)
case SKD_DRVR_STATE_RESUMING:
default:
rc = -EINVAL;
- pr_debug("%s:%s:%d state [%d] not implemented\n",
- skdev->name, __func__, __LINE__, skdev->state);
+ dev_dbg(&skdev->pdev->dev, "state [%d] not implemented\n",
+ skdev->state);
}
return rc;
}
@@ -3655,8 +2294,7 @@ static int skd_unquiesce_dev(struct skd_device *skdev)
skd_log_skdev(skdev, "unquiesce");
if (skdev->state == SKD_DRVR_STATE_ONLINE) {
- pr_debug("%s:%s:%d **** device already ONLINE\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** device already ONLINE\n");
return 0;
}
if (skdev->drive_state != FIT_SR_DRIVE_ONLINE) {
@@ -3669,8 +2307,7 @@ static int skd_unquiesce_dev(struct skd_device *skdev)
* to become available.
*/
skdev->state = SKD_DRVR_STATE_BUSY;
- pr_debug("%s:%s:%d drive BUSY state\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "drive BUSY state\n");
return 0;
}
@@ -3689,26 +2326,24 @@ static int skd_unquiesce_dev(struct skd_device *skdev)
case SKD_DRVR_STATE_IDLE:
case SKD_DRVR_STATE_LOAD:
skdev->state = SKD_DRVR_STATE_ONLINE;
- pr_err("(%s): Driver state %s(%d)=>%s(%d)\n",
- skd_name(skdev),
- skd_skdev_state_to_str(prev_driver_state),
- prev_driver_state, skd_skdev_state_to_str(skdev->state),
- skdev->state);
- pr_debug("%s:%s:%d **** device ONLINE...starting block queue\n",
- skdev->name, __func__, __LINE__);
- pr_debug("%s:%s:%d starting %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- pr_info("(%s): STEC s1120 ONLINE\n", skd_name(skdev));
- blk_start_queue(skdev->queue);
+ dev_err(&skdev->pdev->dev, "Driver state %s(%d)=>%s(%d)\n",
+ skd_skdev_state_to_str(prev_driver_state),
+ prev_driver_state, skd_skdev_state_to_str(skdev->state),
+ skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "**** device ONLINE...starting block queue\n");
+ dev_dbg(&skdev->pdev->dev, "starting queue\n");
+ dev_info(&skdev->pdev->dev, "STEC s1120 ONLINE\n");
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = 1;
wake_up_interruptible(&skdev->waitq);
break;
case SKD_DRVR_STATE_DISAPPEARED:
default:
- pr_debug("%s:%s:%d **** driver state %d, not implemented \n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "**** driver state %d, not implemented\n",
+ skdev->state);
return -EBUSY;
}
return 0;
@@ -3726,11 +2361,10 @@ static irqreturn_t skd_reserved_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
- pr_err("(%s): MSIX reserved irq %d = 0x%x\n", skd_name(skdev),
- irq, SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_err(&skdev->pdev->dev, "MSIX reserved irq %d = 0x%x\n", irq,
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_INT_RESERVED_MASK, FIT_INT_STATUS_HOST);
spin_unlock_irqrestore(&skdev->lock, flags);
return IRQ_HANDLED;
@@ -3742,9 +2376,8 @@ static irqreturn_t skd_statec_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_ISH_FW_STATE_CHANGE, FIT_INT_STATUS_HOST);
skd_isr_fwstate(skdev);
spin_unlock_irqrestore(&skdev->lock, flags);
@@ -3759,19 +2392,18 @@ static irqreturn_t skd_comp_q(int irq, void *skd_host_data)
int deferred;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_ISH_COMPLETION_POSTED, FIT_INT_STATUS_HOST);
deferred = skd_isr_completion_posted(skdev, skd_isr_comp_limit,
&flush_enqueued);
if (flush_enqueued)
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
if (deferred)
schedule_work(&skdev->completion_worker);
else if (!flush_enqueued)
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
spin_unlock_irqrestore(&skdev->lock, flags);
@@ -3784,9 +2416,8 @@ static irqreturn_t skd_msg_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_ISH_MSG_FROM_DEV, FIT_INT_STATUS_HOST);
skd_isr_msg_from_dev(skdev);
spin_unlock_irqrestore(&skdev->lock, flags);
@@ -3799,9 +2430,8 @@ static irqreturn_t skd_qfull_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_INT_QUEUE_FULL, FIT_INT_STATUS_HOST);
spin_unlock_irqrestore(&skdev->lock, flags);
return IRQ_HANDLED;
@@ -3850,8 +2480,7 @@ static int skd_acquire_msix(struct skd_device *skdev)
rc = pci_alloc_irq_vectors(pdev, SKD_MAX_MSIX_COUNT, SKD_MAX_MSIX_COUNT,
PCI_IRQ_MSIX);
if (rc < 0) {
- pr_err("(%s): failed to enable MSI-X %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev, "failed to enable MSI-X %d\n", rc);
goto out;
}
@@ -3859,8 +2488,7 @@ static int skd_acquire_msix(struct skd_device *skdev)
sizeof(struct skd_msix_entry), GFP_KERNEL);
if (!skdev->msix_entries) {
rc = -ENOMEM;
- pr_err("(%s): msix table allocation error\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "msix table allocation error\n");
goto out;
}
@@ -3877,16 +2505,15 @@ static int skd_acquire_msix(struct skd_device *skdev)
msix_entries[i].handler, 0,
qentry->isr_name, skdev);
if (rc) {
- pr_err("(%s): Unable to register(%d) MSI-X "
- "handler %d: %s\n",
- skd_name(skdev), rc, i, qentry->isr_name);
+ dev_err(&skdev->pdev->dev,
+ "Unable to register(%d) MSI-X handler %d: %s\n",
+ rc, i, qentry->isr_name);
goto msix_out;
}
}
- pr_debug("%s:%s:%d %s: <%s> msix %d irq(s) enabled\n",
- skdev->name, __func__, __LINE__,
- pci_name(pdev), skdev->name, SKD_MAX_MSIX_COUNT);
+ dev_dbg(&skdev->pdev->dev, "%d msix irq(s) enabled\n",
+ SKD_MAX_MSIX_COUNT);
return 0;
msix_out:
@@ -3909,8 +2536,8 @@ static int skd_acquire_irq(struct skd_device *skdev)
if (!rc)
return 0;
- pr_err("(%s): failed to enable MSI-X, re-trying with MSI %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev,
+ "failed to enable MSI-X, re-trying with MSI %d\n", rc);
}
snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d", DRV_NAME,
@@ -3920,8 +2547,8 @@ static int skd_acquire_irq(struct skd_device *skdev)
irq_flag |= PCI_IRQ_MSI;
rc = pci_alloc_irq_vectors(pdev, 1, 1, irq_flag);
if (rc < 0) {
- pr_err("(%s): failed to allocate the MSI interrupt %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev,
+ "failed to allocate the MSI interrupt %d\n", rc);
return rc;
}
@@ -3930,8 +2557,8 @@ static int skd_acquire_irq(struct skd_device *skdev)
skdev->isr_name, skdev);
if (rc) {
pci_free_irq_vectors(pdev);
- pr_err("(%s): failed to allocate interrupt %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev, "failed to allocate interrupt %d\n",
+ rc);
return rc;
}
@@ -3965,20 +2592,45 @@ static void skd_release_irq(struct skd_device *skdev)
*****************************************************************************
*/
+static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ enum dma_data_direction dir)
+{
+ struct device *dev = &skdev->pdev->dev;
+ void *buf;
+
+ buf = kmem_cache_alloc(s, gfp);
+ if (!buf)
+ return NULL;
+ *dma_handle = dma_map_single(dev, buf, s->size, dir);
+ if (dma_mapping_error(dev, *dma_handle)) {
+ kfree(buf);
+ buf = NULL;
+ }
+ return buf;
+}
+
+static void skd_free_dma(struct skd_device *skdev, struct kmem_cache *s,
+ void *vaddr, dma_addr_t dma_handle,
+ enum dma_data_direction dir)
+{
+ if (!vaddr)
+ return;
+
+ dma_unmap_single(&skdev->pdev->dev, dma_handle, s->size, dir);
+ kmem_cache_free(s, vaddr);
+}
+
static int skd_cons_skcomp(struct skd_device *skdev)
{
int rc = 0;
struct fit_completion_entry_v1 *skcomp;
- u32 nbytes;
- nbytes = sizeof(*skcomp) * SKD_N_COMPLETION_ENTRY;
- nbytes += sizeof(struct fit_comp_error_info) * SKD_N_COMPLETION_ENTRY;
+ dev_dbg(&skdev->pdev->dev,
+ "comp pci_alloc, total bytes %zd entries %d\n",
+ SKD_SKCOMP_SIZE, SKD_N_COMPLETION_ENTRY);
- pr_debug("%s:%s:%d comp pci_alloc, total bytes %d entries %d\n",
- skdev->name, __func__, __LINE__,
- nbytes, SKD_N_COMPLETION_ENTRY);
-
- skcomp = pci_zalloc_consistent(skdev->pdev, nbytes,
+ skcomp = pci_zalloc_consistent(skdev->pdev, SKD_SKCOMP_SIZE,
&skdev->cq_dma_address);
if (skcomp == NULL) {
@@ -4000,14 +2652,14 @@ static int skd_cons_skmsg(struct skd_device *skdev)
int rc = 0;
u32 i;
- pr_debug("%s:%s:%d skmsg_table kzalloc, struct %lu, count %u total %lu\n",
- skdev->name, __func__, __LINE__,
- sizeof(struct skd_fitmsg_context),
- skdev->num_fitmsg_context,
- sizeof(struct skd_fitmsg_context) * skdev->num_fitmsg_context);
+ dev_dbg(&skdev->pdev->dev,
+ "skmsg_table kcalloc, struct %lu, count %u total %lu\n",
+ sizeof(struct skd_fitmsg_context), skdev->num_fitmsg_context,
+ sizeof(struct skd_fitmsg_context) * skdev->num_fitmsg_context);
- skdev->skmsg_table = kzalloc(sizeof(struct skd_fitmsg_context)
- *skdev->num_fitmsg_context, GFP_KERNEL);
+ skdev->skmsg_table = kcalloc(skdev->num_fitmsg_context,
+ sizeof(struct skd_fitmsg_context),
+ GFP_KERNEL);
if (skdev->skmsg_table == NULL) {
rc = -ENOMEM;
goto err_out;
@@ -4020,9 +2672,8 @@ static int skd_cons_skmsg(struct skd_device *skdev)
skmsg->id = i + SKD_ID_FIT_MSG;
- skmsg->state = SKD_MSG_STATE_IDLE;
skmsg->msg_buf = pci_alloc_consistent(skdev->pdev,
- SKD_N_FITMSG_BYTES + 64,
+ SKD_N_FITMSG_BYTES,
&skmsg->mb_dma_address);
if (skmsg->msg_buf == NULL) {
@@ -4030,22 +2681,13 @@ static int skd_cons_skmsg(struct skd_device *skdev)
goto err_out;
}
- skmsg->offset = (u32)((u64)skmsg->msg_buf &
- (~FIT_QCMD_BASE_ADDRESS_MASK));
- skmsg->msg_buf += ~FIT_QCMD_BASE_ADDRESS_MASK;
- skmsg->msg_buf = (u8 *)((u64)skmsg->msg_buf &
- FIT_QCMD_BASE_ADDRESS_MASK);
- skmsg->mb_dma_address += ~FIT_QCMD_BASE_ADDRESS_MASK;
- skmsg->mb_dma_address &= FIT_QCMD_BASE_ADDRESS_MASK;
+ WARN(((uintptr_t)skmsg->msg_buf | skmsg->mb_dma_address) &
+ (FIT_QCMD_ALIGN - 1),
+ "not aligned: msg_buf %p mb_dma_address %#llx\n",
+ skmsg->msg_buf, skmsg->mb_dma_address);
memset(skmsg->msg_buf, 0, SKD_N_FITMSG_BYTES);
-
- skmsg->next = &skmsg[1];
}
- /* Free list is in order starting with the 0th entry. */
- skdev->skmsg_table[i - 1].next = NULL;
- skdev->skmsg_free_list = skdev->skmsg_table;
-
err_out:
return rc;
}
@@ -4055,18 +2697,14 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
dma_addr_t *ret_dma_addr)
{
struct fit_sg_descriptor *sg_list;
- u32 nbytes;
-
- nbytes = sizeof(*sg_list) * n_sg;
- sg_list = pci_alloc_consistent(skdev->pdev, nbytes, ret_dma_addr);
+ sg_list = skd_alloc_dma(skdev, skdev->sglist_cache, ret_dma_addr,
+ GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
if (sg_list != NULL) {
uint64_t dma_address = *ret_dma_addr;
u32 i;
- memset(sg_list, 0, nbytes);
-
for (i = 0; i < n_sg - 1; i++) {
uint64_t ndp_off;
ndp_off = (i + 1) * sizeof(struct fit_sg_descriptor);
@@ -4079,153 +2717,63 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
return sg_list;
}
-static int skd_cons_skreq(struct skd_device *skdev)
+static void skd_free_sg_list(struct skd_device *skdev,
+ struct fit_sg_descriptor *sg_list,
+ dma_addr_t dma_addr)
{
- int rc = 0;
- u32 i;
-
- pr_debug("%s:%s:%d skreq_table kzalloc, struct %lu, count %u total %lu\n",
- skdev->name, __func__, __LINE__,
- sizeof(struct skd_request_context),
- skdev->num_req_context,
- sizeof(struct skd_request_context) * skdev->num_req_context);
-
- skdev->skreq_table = kzalloc(sizeof(struct skd_request_context)
- * skdev->num_req_context, GFP_KERNEL);
- if (skdev->skreq_table == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- pr_debug("%s:%s:%d alloc sg_table sg_per_req %u scatlist %lu total %lu\n",
- skdev->name, __func__, __LINE__,
- skdev->sgs_per_request, sizeof(struct scatterlist),
- skdev->sgs_per_request * sizeof(struct scatterlist));
-
- for (i = 0; i < skdev->num_req_context; i++) {
- struct skd_request_context *skreq;
-
- skreq = &skdev->skreq_table[i];
-
- skreq->id = i + SKD_ID_RW_REQUEST;
- skreq->state = SKD_REQ_STATE_IDLE;
-
- skreq->sg = kzalloc(sizeof(struct scatterlist) *
- skdev->sgs_per_request, GFP_KERNEL);
- if (skreq->sg == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
- sg_init_table(skreq->sg, skdev->sgs_per_request);
-
- skreq->sksg_list = skd_cons_sg_list(skdev,
- skdev->sgs_per_request,
- &skreq->sksg_dma_address);
-
- if (skreq->sksg_list == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- skreq->next = &skreq[1];
- }
-
- /* Free list is in order starting with the 0th entry. */
- skdev->skreq_table[i - 1].next = NULL;
- skdev->skreq_free_list = skdev->skreq_table;
+ if (WARN_ON_ONCE(!sg_list))
+ return;
-err_out:
- return rc;
+ skd_free_dma(skdev, skdev->sglist_cache, sg_list, dma_addr,
+ DMA_TO_DEVICE);
}
-static int skd_cons_skspcl(struct skd_device *skdev)
+static int skd_init_request(struct blk_mq_tag_set *set, struct request *rq,
+ unsigned int hctx_idx, unsigned int numa_node)
{
- int rc = 0;
- u32 i, nbytes;
-
- pr_debug("%s:%s:%d skspcl_table kzalloc, struct %lu, count %u total %lu\n",
- skdev->name, __func__, __LINE__,
- sizeof(struct skd_special_context),
- skdev->n_special,
- sizeof(struct skd_special_context) * skdev->n_special);
-
- skdev->skspcl_table = kzalloc(sizeof(struct skd_special_context)
- * skdev->n_special, GFP_KERNEL);
- if (skdev->skspcl_table == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
+ struct skd_device *skdev = set->driver_data;
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(rq);
- for (i = 0; i < skdev->n_special; i++) {
- struct skd_special_context *skspcl;
-
- skspcl = &skdev->skspcl_table[i];
-
- skspcl->req.id = i + SKD_ID_SPECIAL_REQUEST;
- skspcl->req.state = SKD_REQ_STATE_IDLE;
-
- skspcl->req.next = &skspcl[1].req;
-
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
-
- skspcl->msg_buf =
- pci_zalloc_consistent(skdev->pdev, nbytes,
- &skspcl->mb_dma_address);
- if (skspcl->msg_buf == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- skspcl->req.sg = kzalloc(sizeof(struct scatterlist) *
- SKD_N_SG_PER_SPECIAL, GFP_KERNEL);
- if (skspcl->req.sg == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- skspcl->req.sksg_list = skd_cons_sg_list(skdev,
- SKD_N_SG_PER_SPECIAL,
- &skspcl->req.
- sksg_dma_address);
- if (skspcl->req.sksg_list == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
- }
+ skreq->state = SKD_REQ_STATE_IDLE;
+ skreq->sg = (void *)(skreq + 1);
+ sg_init_table(skreq->sg, skd_sgs_per_request);
+ skreq->sksg_list = skd_cons_sg_list(skdev, skd_sgs_per_request,
+ &skreq->sksg_dma_address);
- /* Free list is in order starting with the 0th entry. */
- skdev->skspcl_table[i - 1].req.next = NULL;
- skdev->skspcl_free_list = skdev->skspcl_table;
+ return skreq->sksg_list ? 0 : -ENOMEM;
+}
- return rc;
+static void skd_exit_request(struct blk_mq_tag_set *set, struct request *rq,
+ unsigned int hctx_idx)
+{
+ struct skd_device *skdev = set->driver_data;
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(rq);
-err_out:
- return rc;
+ skd_free_sg_list(skdev, skreq->sksg_list, skreq->sksg_dma_address);
}
static int skd_cons_sksb(struct skd_device *skdev)
{
int rc = 0;
struct skd_special_context *skspcl;
- u32 nbytes;
skspcl = &skdev->internal_skspcl;
skspcl->req.id = 0 + SKD_ID_INTERNAL;
skspcl->req.state = SKD_REQ_STATE_IDLE;
- nbytes = SKD_N_INTERNAL_BYTES;
-
- skspcl->data_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
- &skspcl->db_dma_address);
+ skspcl->data_buf = skd_alloc_dma(skdev, skdev->databuf_cache,
+ &skspcl->db_dma_address,
+ GFP_DMA | __GFP_ZERO,
+ DMA_BIDIRECTIONAL);
if (skspcl->data_buf == NULL) {
rc = -ENOMEM;
goto err_out;
}
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- skspcl->msg_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
- &skspcl->mb_dma_address);
+ skspcl->msg_buf = skd_alloc_dma(skdev, skdev->msgbuf_cache,
+ &skspcl->mb_dma_address,
+ GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
if (skspcl->msg_buf == NULL) {
rc = -ENOMEM;
goto err_out;
@@ -4247,6 +2795,14 @@ err_out:
return rc;
}
+static const struct blk_mq_ops skd_mq_ops = {
+ .queue_rq = skd_mq_queue_rq,
+ .complete = skd_complete_rq,
+ .timeout = skd_timed_out,
+ .init_request = skd_init_request,
+ .exit_request = skd_exit_request,
+};
+
static int skd_cons_disk(struct skd_device *skdev)
{
int rc = 0;
@@ -4268,31 +2824,46 @@ static int skd_cons_disk(struct skd_device *skdev)
disk->fops = &skd_blockdev_ops;
disk->private_data = skdev;
- q = blk_init_queue(skd_request_fn, &skdev->lock);
- if (!q) {
- rc = -ENOMEM;
+ memset(&skdev->tag_set, 0, sizeof(skdev->tag_set));
+ skdev->tag_set.ops = &skd_mq_ops;
+ skdev->tag_set.nr_hw_queues = 1;
+ skdev->tag_set.queue_depth = skd_max_queue_depth;
+ skdev->tag_set.cmd_size = sizeof(struct skd_request_context) +
+ skdev->sgs_per_request * sizeof(struct scatterlist);
+ skdev->tag_set.numa_node = NUMA_NO_NODE;
+ skdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
+ BLK_MQ_F_SG_MERGE |
+ BLK_ALLOC_POLICY_TO_MQ_FLAG(BLK_TAG_ALLOC_FIFO);
+ skdev->tag_set.driver_data = skdev;
+ rc = blk_mq_alloc_tag_set(&skdev->tag_set);
+ if (rc)
+ goto err_out;
+ q = blk_mq_init_queue(&skdev->tag_set);
+ if (IS_ERR(q)) {
+ blk_mq_free_tag_set(&skdev->tag_set);
+ rc = PTR_ERR(q);
goto err_out;
}
- blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+ q->queuedata = skdev;
skdev->queue = q;
disk->queue = q;
- q->queuedata = skdev;
blk_queue_write_cache(q, true, true);
blk_queue_max_segments(q, skdev->sgs_per_request);
blk_queue_max_hw_sectors(q, SKD_N_MAX_SECTORS);
- /* set sysfs ptimal_io_size to 8K */
+ /* set optimal I/O size to 8KB */
blk_queue_io_opt(q, 8192);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+ blk_queue_rq_timeout(q, 8 * HZ);
+
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d stopping %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_stop_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev, "stopping queue\n");
+ blk_mq_stop_hw_queues(skdev->queue);
spin_unlock_irqrestore(&skdev->lock, flags);
err_out:
@@ -4306,13 +2877,13 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
{
struct skd_device *skdev;
int blk_major = skd_major;
+ size_t size;
int rc;
skdev = kzalloc(sizeof(*skdev), GFP_KERNEL);
if (!skdev) {
- pr_err(PFX "(%s): memory alloc failure\n",
- pci_name(pdev));
+ dev_err(&pdev->dev, "memory alloc failure\n");
return NULL;
}
@@ -4320,60 +2891,71 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
skdev->pdev = pdev;
skdev->devno = skd_next_devno++;
skdev->major = blk_major;
- sprintf(skdev->name, DRV_NAME "%d", skdev->devno);
skdev->dev_max_queue_depth = 0;
skdev->num_req_context = skd_max_queue_depth;
skdev->num_fitmsg_context = skd_max_queue_depth;
- skdev->n_special = skd_max_pass_thru;
skdev->cur_max_queue_depth = 1;
skdev->queue_low_water_mark = 1;
skdev->proto_ver = 99;
skdev->sgs_per_request = skd_sgs_per_request;
skdev->dbg_level = skd_dbg_level;
- atomic_set(&skdev->device_count, 0);
-
spin_lock_init(&skdev->lock);
+ INIT_WORK(&skdev->start_queue, skd_start_queue);
INIT_WORK(&skdev->completion_worker, skd_completion_worker);
- pr_debug("%s:%s:%d skcomp\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skcomp(skdev);
- if (rc < 0)
+ size = max(SKD_N_FITMSG_BYTES, SKD_N_SPECIAL_FITMSG_BYTES);
+ skdev->msgbuf_cache = kmem_cache_create("skd-msgbuf", size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!skdev->msgbuf_cache)
goto err_out;
-
- pr_debug("%s:%s:%d skmsg\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skmsg(skdev);
- if (rc < 0)
+ WARN_ONCE(kmem_cache_size(skdev->msgbuf_cache) < size,
+ "skd-msgbuf: %d < %zd\n",
+ kmem_cache_size(skdev->msgbuf_cache), size);
+ size = skd_sgs_per_request * sizeof(struct fit_sg_descriptor);
+ skdev->sglist_cache = kmem_cache_create("skd-sglist", size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!skdev->sglist_cache)
+ goto err_out;
+ WARN_ONCE(kmem_cache_size(skdev->sglist_cache) < size,
+ "skd-sglist: %d < %zd\n",
+ kmem_cache_size(skdev->sglist_cache), size);
+ size = SKD_N_INTERNAL_BYTES;
+ skdev->databuf_cache = kmem_cache_create("skd-databuf", size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!skdev->databuf_cache)
goto err_out;
+ WARN_ONCE(kmem_cache_size(skdev->databuf_cache) < size,
+ "skd-databuf: %d < %zd\n",
+ kmem_cache_size(skdev->databuf_cache), size);
- pr_debug("%s:%s:%d skreq\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skreq(skdev);
+ dev_dbg(&skdev->pdev->dev, "skcomp\n");
+ rc = skd_cons_skcomp(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d skspcl\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skspcl(skdev);
+ dev_dbg(&skdev->pdev->dev, "skmsg\n");
+ rc = skd_cons_skmsg(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d sksb\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "sksb\n");
rc = skd_cons_sksb(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d disk\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "disk\n");
rc = skd_cons_disk(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d VICTORY\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "VICTORY\n");
return skdev;
err_out:
- pr_debug("%s:%s:%d construct failed\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "construct failed\n");
skd_destruct(skdev);
return NULL;
}
@@ -4386,14 +2968,9 @@ err_out:
static void skd_free_skcomp(struct skd_device *skdev)
{
- if (skdev->skcomp_table != NULL) {
- u32 nbytes;
-
- nbytes = sizeof(skdev->skcomp_table[0]) *
- SKD_N_COMPLETION_ENTRY;
- pci_free_consistent(skdev->pdev, nbytes,
+ if (skdev->skcomp_table)
+ pci_free_consistent(skdev->pdev, SKD_SKCOMP_SIZE,
skdev->skcomp_table, skdev->cq_dma_address);
- }
skdev->skcomp_table = NULL;
skdev->cq_dma_address = 0;
@@ -4412,8 +2989,6 @@ static void skd_free_skmsg(struct skd_device *skdev)
skmsg = &skdev->skmsg_table[i];
if (skmsg->msg_buf != NULL) {
- skmsg->msg_buf += skmsg->offset;
- skmsg->mb_dma_address += skmsg->offset;
pci_free_consistent(skdev->pdev, SKD_N_FITMSG_BYTES,
skmsg->msg_buf,
skmsg->mb_dma_address);
@@ -4426,109 +3001,23 @@ static void skd_free_skmsg(struct skd_device *skdev)
skdev->skmsg_table = NULL;
}
-static void skd_free_sg_list(struct skd_device *skdev,
- struct fit_sg_descriptor *sg_list,
- u32 n_sg, dma_addr_t dma_addr)
-{
- if (sg_list != NULL) {
- u32 nbytes;
-
- nbytes = sizeof(*sg_list) * n_sg;
-
- pci_free_consistent(skdev->pdev, nbytes, sg_list, dma_addr);
- }
-}
-
-static void skd_free_skreq(struct skd_device *skdev)
-{
- u32 i;
-
- if (skdev->skreq_table == NULL)
- return;
-
- for (i = 0; i < skdev->num_req_context; i++) {
- struct skd_request_context *skreq;
-
- skreq = &skdev->skreq_table[i];
-
- skd_free_sg_list(skdev, skreq->sksg_list,
- skdev->sgs_per_request,
- skreq->sksg_dma_address);
-
- skreq->sksg_list = NULL;
- skreq->sksg_dma_address = 0;
-
- kfree(skreq->sg);
- }
-
- kfree(skdev->skreq_table);
- skdev->skreq_table = NULL;
-}
-
-static void skd_free_skspcl(struct skd_device *skdev)
-{
- u32 i;
- u32 nbytes;
-
- if (skdev->skspcl_table == NULL)
- return;
-
- for (i = 0; i < skdev->n_special; i++) {
- struct skd_special_context *skspcl;
-
- skspcl = &skdev->skspcl_table[i];
-
- if (skspcl->msg_buf != NULL) {
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- pci_free_consistent(skdev->pdev, nbytes,
- skspcl->msg_buf,
- skspcl->mb_dma_address);
- }
-
- skspcl->msg_buf = NULL;
- skspcl->mb_dma_address = 0;
-
- skd_free_sg_list(skdev, skspcl->req.sksg_list,
- SKD_N_SG_PER_SPECIAL,
- skspcl->req.sksg_dma_address);
-
- skspcl->req.sksg_list = NULL;
- skspcl->req.sksg_dma_address = 0;
-
- kfree(skspcl->req.sg);
- }
-
- kfree(skdev->skspcl_table);
- skdev->skspcl_table = NULL;
-}
-
static void skd_free_sksb(struct skd_device *skdev)
{
- struct skd_special_context *skspcl;
- u32 nbytes;
-
- skspcl = &skdev->internal_skspcl;
-
- if (skspcl->data_buf != NULL) {
- nbytes = SKD_N_INTERNAL_BYTES;
+ struct skd_special_context *skspcl = &skdev->internal_skspcl;
- pci_free_consistent(skdev->pdev, nbytes,
- skspcl->data_buf, skspcl->db_dma_address);
- }
+ skd_free_dma(skdev, skdev->databuf_cache, skspcl->data_buf,
+ skspcl->db_dma_address, DMA_BIDIRECTIONAL);
skspcl->data_buf = NULL;
skspcl->db_dma_address = 0;
- if (skspcl->msg_buf != NULL) {
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- pci_free_consistent(skdev->pdev, nbytes,
- skspcl->msg_buf, skspcl->mb_dma_address);
- }
+ skd_free_dma(skdev, skdev->msgbuf_cache, skspcl->msg_buf,
+ skspcl->mb_dma_address, DMA_TO_DEVICE);
skspcl->msg_buf = NULL;
skspcl->mb_dma_address = 0;
- skd_free_sg_list(skdev, skspcl->req.sksg_list, 1,
+ skd_free_sg_list(skdev, skspcl->req.sksg_list,
skspcl->req.sksg_dma_address);
skspcl->req.sksg_list = NULL;
@@ -4539,15 +3028,20 @@ static void skd_free_disk(struct skd_device *skdev)
{
struct gendisk *disk = skdev->disk;
- if (disk != NULL) {
- struct request_queue *q = disk->queue;
+ if (disk && (disk->flags & GENHD_FL_UP))
+ del_gendisk(disk);
- if (disk->flags & GENHD_FL_UP)
- del_gendisk(disk);
- if (q)
- blk_cleanup_queue(q);
- put_disk(disk);
+ if (skdev->queue) {
+ blk_cleanup_queue(skdev->queue);
+ skdev->queue = NULL;
+ if (disk)
+ disk->queue = NULL;
}
+
+ if (skdev->tag_set.tags)
+ blk_mq_free_tag_set(&skdev->tag_set);
+
+ put_disk(disk);
skdev->disk = NULL;
}
@@ -4556,26 +3050,25 @@ static void skd_destruct(struct skd_device *skdev)
if (skdev == NULL)
return;
+ cancel_work_sync(&skdev->start_queue);
- pr_debug("%s:%s:%d disk\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "disk\n");
skd_free_disk(skdev);
- pr_debug("%s:%s:%d sksb\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "sksb\n");
skd_free_sksb(skdev);
- pr_debug("%s:%s:%d skspcl\n", skdev->name, __func__, __LINE__);
- skd_free_skspcl(skdev);
-
- pr_debug("%s:%s:%d skreq\n", skdev->name, __func__, __LINE__);
- skd_free_skreq(skdev);
-
- pr_debug("%s:%s:%d skmsg\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "skmsg\n");
skd_free_skmsg(skdev);
- pr_debug("%s:%s:%d skcomp\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "skcomp\n");
skd_free_skcomp(skdev);
- pr_debug("%s:%s:%d skdev\n", skdev->name, __func__, __LINE__);
+ kmem_cache_destroy(skdev->databuf_cache);
+ kmem_cache_destroy(skdev->sglist_cache);
+ kmem_cache_destroy(skdev->msgbuf_cache);
+
+ dev_dbg(&skdev->pdev->dev, "skdev\n");
kfree(skdev);
}
@@ -4592,9 +3085,8 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
skdev = bdev->bd_disk->private_data;
- pr_debug("%s:%s:%d %s: CMD[%s] getgeo device\n",
- skdev->name, __func__, __LINE__,
- bdev->bd_disk->disk_name, current->comm);
+ dev_dbg(&skdev->pdev->dev, "%s: CMD[%s] getgeo device\n",
+ bdev->bd_disk->disk_name, current->comm);
if (skdev->read_cap_is_valid) {
capacity = get_capacity(skdev->disk);
@@ -4609,18 +3101,16 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
static int skd_bdev_attach(struct device *parent, struct skd_device *skdev)
{
- pr_debug("%s:%s:%d add_disk\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "add_disk\n");
device_add_disk(parent, skdev->disk);
return 0;
}
static const struct block_device_operations skd_blockdev_ops = {
.owner = THIS_MODULE,
- .ioctl = skd_bdev_ioctl,
.getgeo = skd_bdev_getgeo,
};
-
/*
*****************************************************************************
* PCIe DRIVER GLUE
@@ -4671,10 +3161,8 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
char pci_str[32];
struct skd_device *skdev;
- pr_info("STEC s1120 Driver(%s) version %s-b%s\n",
- DRV_NAME, DRV_VERSION, DRV_BUILD_ID);
- pr_info("(skd?:??:[%s]): vendor=%04X device=%04x\n",
- pci_name(pdev), pdev->vendor, pdev->device);
+ dev_dbg(&pdev->dev, "vendor=%04X device=%04x\n", pdev->vendor,
+ pdev->device);
rc = pci_enable_device(pdev);
if (rc)
@@ -4685,16 +3173,13 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (!rc) {
if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
-
- pr_err("(%s): consistent DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "consistent DMA mask error %d\n",
+ rc);
}
} else {
- (rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)));
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc) {
-
- pr_err("(%s): DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "DMA mask error %d\n", rc);
goto err_out_regions;
}
}
@@ -4714,19 +3199,17 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
skd_pci_info(skdev, pci_str);
- pr_info("(%s): %s 64bit\n", skd_name(skdev), pci_str);
+ dev_info(&pdev->dev, "%s 64bit\n", pci_str);
pci_set_master(pdev);
rc = pci_enable_pcie_error_reporting(pdev);
if (rc) {
- pr_err(
- "(%s): bad enable of PCIe error reporting rc=%d\n",
- skd_name(skdev), rc);
+ dev_err(&pdev->dev,
+ "bad enable of PCIe error reporting rc=%d\n", rc);
skdev->pcie_error_reporting_is_enabled = 0;
} else
skdev->pcie_error_reporting_is_enabled = 1;
-
pci_set_drvdata(pdev, skdev);
for (i = 0; i < SKD_MAX_BARS; i++) {
@@ -4735,21 +3218,19 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
skdev->mem_map[i] = ioremap(skdev->mem_phys[i],
skdev->mem_size[i]);
if (!skdev->mem_map[i]) {
- pr_err("(%s): Unable to map adapter memory!\n",
- skd_name(skdev));
+ dev_err(&pdev->dev,
+ "Unable to map adapter memory!\n");
rc = -ENODEV;
goto err_out_iounmap;
}
- pr_debug("%s:%s:%d mem_map=%p, phyd=%016llx, size=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->mem_map[i],
- (uint64_t)skdev->mem_phys[i], skdev->mem_size[i]);
+ dev_dbg(&pdev->dev, "mem_map=%p, phyd=%016llx, size=%d\n",
+ skdev->mem_map[i], (uint64_t)skdev->mem_phys[i],
+ skdev->mem_size[i]);
}
rc = skd_acquire_irq(skdev);
if (rc) {
- pr_err("(%s): interrupt resource error %d\n",
- skd_name(skdev), rc);
+ dev_err(&pdev->dev, "interrupt resource error %d\n", rc);
goto err_out_iounmap;
}
@@ -4771,29 +3252,14 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
} else {
/* we timed out, something is wrong with the device,
don't add the disk structure */
- pr_err(
- "(%s): error: waiting for s1120 timed out %d!\n",
- skd_name(skdev), rc);
+ dev_err(&pdev->dev, "error: waiting for s1120 timed out %d!\n",
+ rc);
/* in case of no error; we timeout with ENXIO */
if (!rc)
rc = -ENXIO;
goto err_out_timer;
}
-
-#ifdef SKD_VMK_POLL_HANDLER
- if (skdev->irq_type == SKD_IRQ_MSIX) {
- /* MSIX completion handler is being used for coredump */
- vmklnx_scsi_register_poll_handler(skdev->scsi_host,
- skdev->msix_entries[5].vector,
- skd_comp_q, skdev);
- } else {
- vmklnx_scsi_register_poll_handler(skdev->scsi_host,
- skdev->pdev->irq, skd_isr,
- skdev);
- }
-#endif /* SKD_VMK_POLL_HANDLER */
-
return rc;
err_out_timer:
@@ -4826,7 +3292,7 @@ static void skd_pci_remove(struct pci_dev *pdev)
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return;
}
skd_stop_device(skdev);
@@ -4834,7 +3300,7 @@ static void skd_pci_remove(struct pci_dev *pdev)
for (i = 0; i < SKD_MAX_BARS; i++)
if (skdev->mem_map[i])
- iounmap((u32 *)skdev->mem_map[i]);
+ iounmap(skdev->mem_map[i]);
if (skdev->pcie_error_reporting_is_enabled)
pci_disable_pcie_error_reporting(pdev);
@@ -4855,7 +3321,7 @@ static int skd_pci_suspend(struct pci_dev *pdev, pm_message_t state)
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return -EIO;
}
@@ -4865,7 +3331,7 @@ static int skd_pci_suspend(struct pci_dev *pdev, pm_message_t state)
for (i = 0; i < SKD_MAX_BARS; i++)
if (skdev->mem_map[i])
- iounmap((u32 *)skdev->mem_map[i]);
+ iounmap(skdev->mem_map[i]);
if (skdev->pcie_error_reporting_is_enabled)
pci_disable_pcie_error_reporting(pdev);
@@ -4885,7 +3351,7 @@ static int skd_pci_resume(struct pci_dev *pdev)
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return -1;
}
@@ -4903,15 +3369,14 @@ static int skd_pci_resume(struct pci_dev *pdev)
if (!rc) {
if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
- pr_err("(%s): consistent DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "consistent DMA mask error %d\n",
+ rc);
}
} else {
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc) {
- pr_err("(%s): DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "DMA mask error %d\n", rc);
goto err_out_regions;
}
}
@@ -4919,8 +3384,8 @@ static int skd_pci_resume(struct pci_dev *pdev)
pci_set_master(pdev);
rc = pci_enable_pcie_error_reporting(pdev);
if (rc) {
- pr_err("(%s): bad enable of PCIe error reporting rc=%d\n",
- skdev->name, rc);
+ dev_err(&pdev->dev,
+ "bad enable of PCIe error reporting rc=%d\n", rc);
skdev->pcie_error_reporting_is_enabled = 0;
} else
skdev->pcie_error_reporting_is_enabled = 1;
@@ -4932,21 +3397,17 @@ static int skd_pci_resume(struct pci_dev *pdev)
skdev->mem_map[i] = ioremap(skdev->mem_phys[i],
skdev->mem_size[i]);
if (!skdev->mem_map[i]) {
- pr_err("(%s): Unable to map adapter memory!\n",
- skd_name(skdev));
+ dev_err(&pdev->dev, "Unable to map adapter memory!\n");
rc = -ENODEV;
goto err_out_iounmap;
}
- pr_debug("%s:%s:%d mem_map=%p, phyd=%016llx, size=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->mem_map[i],
- (uint64_t)skdev->mem_phys[i], skdev->mem_size[i]);
+ dev_dbg(&pdev->dev, "mem_map=%p, phyd=%016llx, size=%d\n",
+ skdev->mem_map[i], (uint64_t)skdev->mem_phys[i],
+ skdev->mem_size[i]);
}
rc = skd_acquire_irq(skdev);
if (rc) {
-
- pr_err("(%s): interrupt resource error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "interrupt resource error %d\n", rc);
goto err_out_iounmap;
}
@@ -4984,15 +3445,15 @@ static void skd_pci_shutdown(struct pci_dev *pdev)
{
struct skd_device *skdev;
- pr_err("skd_pci_shutdown called\n");
+ dev_err(&pdev->dev, "%s called\n", __func__);
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return;
}
- pr_err("%s: calling stop\n", skd_name(skdev));
+ dev_err(&pdev->dev, "calling stop\n");
skd_stop_device(skdev);
}
@@ -5012,21 +3473,6 @@ static struct pci_driver skd_driver = {
*****************************************************************************
*/
-static const char *skd_name(struct skd_device *skdev)
-{
- memset(skdev->id_str, 0, sizeof(skdev->id_str));
-
- if (skdev->inquiry_is_valid)
- snprintf(skdev->id_str, sizeof(skdev->id_str), "%s:%s:[%s]",
- skdev->name, skdev->inq_serial_num,
- pci_name(skdev->pdev));
- else
- snprintf(skdev->id_str, sizeof(skdev->id_str), "%s:??:[%s]",
- skdev->name, pci_name(skdev->pdev));
-
- return skdev->id_str;
-}
-
const char *skd_drive_state_to_str(int state)
{
switch (state) {
@@ -5078,8 +3524,6 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
return "PAUSING";
case SKD_DRVR_STATE_PAUSED:
return "PAUSED";
- case SKD_DRVR_STATE_DRAINING_TIMEOUT:
- return "DRAINING_TIMEOUT";
case SKD_DRVR_STATE_RESTARTING:
return "RESTARTING";
case SKD_DRVR_STATE_RESUMING:
@@ -5106,18 +3550,6 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
}
}
-static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
-{
- switch (state) {
- case SKD_MSG_STATE_IDLE:
- return "IDLE";
- case SKD_MSG_STATE_BUSY:
- return "BUSY";
- default:
- return "???";
- }
-}
-
static const char *skd_skreq_state_to_str(enum skd_req_state state)
{
switch (state) {
@@ -5131,8 +3563,6 @@ static const char *skd_skreq_state_to_str(enum skd_req_state state)
return "COMPLETED";
case SKD_REQ_STATE_TIMEOUT:
return "TIMEOUT";
- case SKD_REQ_STATE_ABORTED:
- return "ABORTED";
default:
return "???";
}
@@ -5140,58 +3570,34 @@ static const char *skd_skreq_state_to_str(enum skd_req_state state)
static void skd_log_skdev(struct skd_device *skdev, const char *event)
{
- pr_debug("%s:%s:%d (%s) skdev=%p event='%s'\n",
- skdev->name, __func__, __LINE__, skdev->name, skdev, event);
- pr_debug("%s:%s:%d drive_state=%s(%d) driver_state=%s(%d)\n",
- skdev->name, __func__, __LINE__,
- skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
- skd_skdev_state_to_str(skdev->state), skdev->state);
- pr_debug("%s:%s:%d busy=%d limit=%d dev=%d lowat=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->in_flight, skdev->cur_max_queue_depth,
- skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
- pr_debug("%s:%s:%d timestamp=0x%x cycle=%d cycle_ix=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->timeout_stamp, skdev->skcomp_cycle, skdev->skcomp_ix);
-}
-
-static void skd_log_skmsg(struct skd_device *skdev,
- struct skd_fitmsg_context *skmsg, const char *event)
-{
- pr_debug("%s:%s:%d (%s) skmsg=%p event='%s'\n",
- skdev->name, __func__, __LINE__, skdev->name, skmsg, event);
- pr_debug("%s:%s:%d state=%s(%d) id=0x%04x length=%d\n",
- skdev->name, __func__, __LINE__,
- skd_skmsg_state_to_str(skmsg->state), skmsg->state,
- skmsg->id, skmsg->length);
+ dev_dbg(&skdev->pdev->dev, "skdev=%p event='%s'\n", skdev, event);
+ dev_dbg(&skdev->pdev->dev, " drive_state=%s(%d) driver_state=%s(%d)\n",
+ skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
+ skd_skdev_state_to_str(skdev->state), skdev->state);
+ dev_dbg(&skdev->pdev->dev, " busy=%d limit=%d dev=%d lowat=%d\n",
+ skd_in_flight(skdev), skdev->cur_max_queue_depth,
+ skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
+ dev_dbg(&skdev->pdev->dev, " cycle=%d cycle_ix=%d\n",
+ skdev->skcomp_cycle, skdev->skcomp_ix);
}
static void skd_log_skreq(struct skd_device *skdev,
struct skd_request_context *skreq, const char *event)
{
- pr_debug("%s:%s:%d (%s) skreq=%p event='%s'\n",
- skdev->name, __func__, __LINE__, skdev->name, skreq, event);
- pr_debug("%s:%s:%d state=%s(%d) id=0x%04x fitmsg=0x%04x\n",
- skdev->name, __func__, __LINE__,
- skd_skreq_state_to_str(skreq->state), skreq->state,
- skreq->id, skreq->fitmsg_id);
- pr_debug("%s:%s:%d timo=0x%x sg_dir=%d n_sg=%d\n",
- skdev->name, __func__, __LINE__,
- skreq->timeout_stamp, skreq->sg_data_dir, skreq->n_sg);
-
- if (skreq->req != NULL) {
- struct request *req = skreq->req;
- u32 lba = (u32)blk_rq_pos(req);
- u32 count = blk_rq_sectors(req);
-
- pr_debug("%s:%s:%d "
- "req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n",
- skdev->name, __func__, __LINE__,
- req, lba, lba, count, count,
- (int)rq_data_dir(req));
- } else
- pr_debug("%s:%s:%d req=NULL\n",
- skdev->name, __func__, __LINE__);
+ struct request *req = blk_mq_rq_from_pdu(skreq);
+ u32 lba = blk_rq_pos(req);
+ u32 count = blk_rq_sectors(req);
+
+ dev_dbg(&skdev->pdev->dev, "skreq=%p event='%s'\n", skreq, event);
+ dev_dbg(&skdev->pdev->dev, " state=%s(%d) id=0x%04x fitmsg=0x%04x\n",
+ skd_skreq_state_to_str(skreq->state), skreq->state, skreq->id,
+ skreq->fitmsg_id);
+ dev_dbg(&skdev->pdev->dev, " sg_dir=%d n_sg=%d\n",
+ skreq->data_dir, skreq->n_sg);
+
+ dev_dbg(&skdev->pdev->dev,
+ "req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n", req, lba, lba,
+ count, count, (int)rq_data_dir(req));
}
/*
@@ -5202,7 +3608,14 @@ static void skd_log_skreq(struct skd_device *skdev,
static int __init skd_init(void)
{
- pr_info(PFX " v%s-b%s loaded\n", DRV_VERSION, DRV_BUILD_ID);
+ BUILD_BUG_ON(sizeof(struct fit_completion_entry_v1) != 8);
+ BUILD_BUG_ON(sizeof(struct fit_comp_error_info) != 32);
+ BUILD_BUG_ON(sizeof(struct skd_command_header) != 16);
+ BUILD_BUG_ON(sizeof(struct skd_scsi_request) != 32);
+ BUILD_BUG_ON(sizeof(struct driver_inquiry_data) != 44);
+ BUILD_BUG_ON(offsetof(struct skd_msg_buf, fmh) != 0);
+ BUILD_BUG_ON(offsetof(struct skd_msg_buf, scsi) != 64);
+ BUILD_BUG_ON(sizeof(struct skd_msg_buf) != SKD_N_FITMSG_BYTES);
switch (skd_isr_type) {
case SKD_IRQ_LEGACY:
@@ -5222,7 +3635,8 @@ static int __init skd_init(void)
skd_max_queue_depth = SKD_MAX_QUEUE_DEPTH_DEFAULT;
}
- if (skd_max_req_per_msg < 1 || skd_max_req_per_msg > 14) {
+ if (skd_max_req_per_msg < 1 ||
+ skd_max_req_per_msg > SKD_MAX_REQ_PER_MSG) {
pr_err(PFX "skd_max_req_per_msg %d invalid, re-set to %d\n",
skd_max_req_per_msg, SKD_MAX_REQ_PER_MSG_DEFAULT);
skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
@@ -5246,19 +3660,11 @@ static int __init skd_init(void)
skd_isr_comp_limit = 0;
}
- if (skd_max_pass_thru < 1 || skd_max_pass_thru > 50) {
- pr_err(PFX "skd_max_pass_thru %d invalid, re-set to %d\n",
- skd_max_pass_thru, SKD_N_SPECIAL_CONTEXT);
- skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
- }
-
return pci_register_driver(&skd_driver);
}
static void __exit skd_exit(void)
{
- pr_info(PFX " v%s-b%s unloading\n", DRV_VERSION, DRV_BUILD_ID);
-
pci_unregister_driver(&skd_driver);
if (skd_major)
diff --git a/drivers/block/skd_s1120.h b/drivers/block/skd_s1120.h
index 61c757ff0161..de35f47e953c 100644
--- a/drivers/block/skd_s1120.h
+++ b/drivers/block/skd_s1120.h
@@ -1,19 +1,15 @@
-/* Copyright 2012 STEC, Inc.
+/*
+ * Copyright 2012 STEC, Inc.
+ * Copyright (c) 2017 Western Digital Corporation or its affiliates.
*
- * This file is licensed under the terms of the 3-clause
- * BSD License (http://opensource.org/licenses/BSD-3-Clause)
- * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
- * at your option. Both licenses are also available in the LICENSE file
- * distributed with this project. This file may not be copied, modified,
- * or distributed except in accordance with those terms.
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
*/
#ifndef SKD_S1120_H
#define SKD_S1120_H
-#pragma pack(push, s1120_h, 1)
-
/*
* Q-channel, 64-bit r/w
*/
@@ -30,7 +26,7 @@
#define FIT_QCMD_MSGSIZE_128 (0x1 << 4)
#define FIT_QCMD_MSGSIZE_256 (0x2 << 4)
#define FIT_QCMD_MSGSIZE_512 (0x3 << 4)
-#define FIT_QCMD_BASE_ADDRESS_MASK (0xFFFFFFFFFFFFFFC0ull)
+#define FIT_QCMD_ALIGN L1_CACHE_BYTES
/*
* Control, 32-bit r/w
@@ -250,7 +246,7 @@ struct fit_msg_hdr {
* 20-23 of the FIT_MTD_FITFW_INIT response.
*/
struct fit_completion_entry_v1 {
- uint32_t num_returned_bytes;
+ __be32 num_returned_bytes;
uint16_t tag;
uint8_t status; /* SCSI status */
uint8_t cycle;
@@ -278,7 +274,7 @@ struct fit_comp_error_info {
uint16_t sks_low; /* 10: Sense Key Specific (LSW) */
uint16_t reserved3; /* 12: Part of additional sense bytes (unused) */
uint16_t uec; /* 14: Additional Sense Bytes */
- uint64_t per; /* 16: Additional Sense Bytes */
+ uint64_t per __packed; /* 16: Additional Sense Bytes */
uint8_t reserved4[2]; /* 1E: Additional Sense Bytes (unused) */
};
@@ -292,11 +288,11 @@ struct fit_comp_error_info {
* Version one has the last 32 bits sg_list_len_bytes;
*/
struct skd_command_header {
- uint64_t sg_list_dma_address;
+ __be64 sg_list_dma_address;
uint16_t tag;
uint8_t attribute;
uint8_t add_cdb_len; /* In 32 bit words */
- uint32_t sg_list_len_bytes;
+ __be32 sg_list_len_bytes;
};
struct skd_scsi_request {
@@ -309,22 +305,20 @@ struct driver_inquiry_data {
uint8_t peripheral_device_type:5;
uint8_t qualifier:3;
uint8_t page_code;
- uint16_t page_length;
- uint16_t pcie_bus_number;
+ __be16 page_length;
+ __be16 pcie_bus_number;
uint8_t pcie_device_number;
uint8_t pcie_function_number;
uint8_t pcie_link_speed;
uint8_t pcie_link_lanes;
- uint16_t pcie_vendor_id;
- uint16_t pcie_device_id;
- uint16_t pcie_subsystem_vendor_id;
- uint16_t pcie_subsystem_device_id;
+ __be16 pcie_vendor_id;
+ __be16 pcie_device_id;
+ __be16 pcie_subsystem_vendor_id;
+ __be16 pcie_subsystem_device_id;
uint8_t reserved1[2];
uint8_t reserved2[3];
uint8_t driver_version_length;
uint8_t driver_version[0x14];
};
-#pragma pack(pop, s1120_h)
-
#endif /* SKD_S1120_H */
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 6b16ead1da58..ad9749463d4f 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -875,6 +875,56 @@ static void print_version(void)
printk(KERN_INFO "%s", version);
}
+struct vdc_check_port_data {
+ int dev_no;
+ char *type;
+};
+
+static int vdc_device_probed(struct device *dev, void *arg)
+{
+ struct vio_dev *vdev = to_vio_dev(dev);
+ struct vdc_check_port_data *port_data;
+
+ port_data = (struct vdc_check_port_data *)arg;
+
+ if ((vdev->dev_no == port_data->dev_no) &&
+ (!(strcmp((char *)&vdev->type, port_data->type))) &&
+ dev_get_drvdata(dev)) {
+ /* This device has already been configured
+ * by vdc_port_probe()
+ */
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Determine whether the VIO device is part of an mpgroup
+ * by locating all the virtual-device-port nodes associated
+ * with the parent virtual-device node for the VIO device
+ * and checking whether any of these nodes are vdc-ports
+ * which have already been configured.
+ *
+ * Returns true if this device is part of an mpgroup and has
+ * already been probed.
+ */
+static bool vdc_port_mpgroup_check(struct vio_dev *vdev)
+{
+ struct vdc_check_port_data port_data;
+ struct device *dev;
+
+ port_data.dev_no = vdev->dev_no;
+ port_data.type = (char *)&vdev->type;
+
+ dev = device_find_child(vdev->dev.parent, &port_data,
+ vdc_device_probed);
+
+ if (dev)
+ return true;
+
+ return false;
+}
+
static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
{
struct mdesc_handle *hp;
@@ -893,6 +943,14 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
goto err_out_release_mdesc;
}
+ /* Check if this device is part of an mpgroup */
+ if (vdc_port_mpgroup_check(vdev)) {
+ printk(KERN_WARNING
+ "VIO: Ignoring extra vdisk port %s",
+ dev_name(&vdev->dev));
+ goto err_out_release_mdesc;
+ }
+
port = kzalloc(sizeof(*port), GFP_KERNEL);
err = -ENOMEM;
if (!port) {
@@ -943,6 +1001,9 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
if (err)
goto err_out_free_tx_ring;
+ /* Note that the device driver_data is used to determine
+ * whether the port has been probed.
+ */
dev_set_drvdata(&vdev->dev, port);
mdesc_release(hp);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1498b899a593..34e17ee799be 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -265,7 +265,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
}
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
- if (req_op(req) == REQ_OP_SCSI_IN || req_op(req) == REQ_OP_SCSI_OUT)
+ if (blk_rq_is_scsi(req))
err = virtblk_add_req_scsi(vblk->vqs[qid].vq, vbr, vbr->sg, num);
else
err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
@@ -381,6 +381,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
struct request_queue *q = vblk->disk->queue;
char cap_str_2[10], cap_str_10[10];
char *envp[] = { "RESIZE=1", NULL };
+ unsigned long long nblocks;
u64 capacity;
/* Host must always specify the capacity. */
@@ -393,16 +394,19 @@ static void virtblk_config_changed_work(struct work_struct *work)
capacity = (sector_t)-1;
}
- string_get_size(capacity, queue_logical_block_size(q),
+ nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
+
+ string_get_size(nblocks, queue_logical_block_size(q),
STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
- string_get_size(capacity, queue_logical_block_size(q),
+ string_get_size(nblocks, queue_logical_block_size(q),
STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
dev_notice(&vdev->dev,
- "new size: %llu %d-byte logical blocks (%s/%s)\n",
- (unsigned long long)capacity,
- queue_logical_block_size(q),
- cap_str_10, cap_str_2);
+ "new size: %llu %d-byte logical blocks (%s/%s)\n",
+ nblocks,
+ queue_logical_block_size(q),
+ cap_str_10,
+ cap_str_2);
set_capacity(vblk->disk, capacity);
revalidate_disk(vblk->disk);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index fe7cd58c43d0..987d665e82de 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -705,9 +705,9 @@ static unsigned int xen_blkbk_unmap_prepare(
GNTMAP_host_map, pages[i]->handle);
pages[i]->handle = BLKBACK_INVALID_HANDLE;
invcount++;
- }
+ }
- return invcount;
+ return invcount;
}
static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
@@ -1251,6 +1251,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
break;
case BLKIF_OP_WRITE_BARRIER:
drain = true;
+ /* fall through */
case BLKIF_OP_FLUSH_DISKCACHE:
ring->st_f_req++;
operation = REQ_OP_WRITE;
@@ -1362,7 +1363,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
goto fail_put_bio;
biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
+ bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio->bi_iter.bi_sector = preq.sector_number;
@@ -1381,7 +1382,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
goto fail_put_bio;
biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
+ bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio_set_op_attrs(bio, operation, operation_flags);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 792da683e70d..21c1be1eb226 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -244,6 +244,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
struct pending_req *req, *n;
unsigned int j, r;
+ bool busy = false;
for (r = 0; r < blkif->nr_rings; r++) {
struct xen_blkif_ring *ring = &blkif->rings[r];
@@ -261,8 +262,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
* don't have any discard_io or other_io requests. So, checking
* for inflight IO is enough.
*/
- if (atomic_read(&ring->inflight) > 0)
- return -EBUSY;
+ if (atomic_read(&ring->inflight) > 0) {
+ busy = true;
+ continue;
+ }
if (ring->irq) {
unbind_from_irqhandler(ring->irq, ring);
@@ -300,6 +303,9 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
ring->active = false;
}
+ if (busy)
+ return -EBUSY;
+
blkif->nr_ring_pages = 0;
/*
* blkif->rings was allocated in connect_ring, so we should free it in
@@ -810,7 +816,8 @@ static void frontend_changed(struct xenbus_device *dev,
xenbus_switch_state(dev, XenbusStateClosed);
if (xenbus_dev_is_online(dev))
break;
- /* fall through if not online */
+ /* fall through */
+ /* if not online */
case XenbusStateUnknown:
/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
device_unregister(&dev->dev);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 98e34e4c62b8..891265acb10e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2075,9 +2075,9 @@ static int blkfront_resume(struct xenbus_device *dev)
/*
* Get the bios in the request so we can re-queue them.
*/
- if (req_op(shadow[i].request) == REQ_OP_FLUSH ||
- req_op(shadow[i].request) == REQ_OP_DISCARD ||
- req_op(shadow[i].request) == REQ_OP_SECURE_ERASE ||
+ if (req_op(shadow[j].request) == REQ_OP_FLUSH ||
+ req_op(shadow[j].request) == REQ_OP_DISCARD ||
+ req_op(shadow[j].request) == REQ_OP_SECURE_ERASE ||
shadow[j].request->cmd_flags & REQ_FUA) {
/*
* Flush operations don't contain bios, so
@@ -2456,7 +2456,7 @@ static void blkback_changed(struct xenbus_device *dev,
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
- /* Missed the backend's Closing state -- fallthrough */
+ /* fall through */
case XenbusStateClosing:
if (info)
blkfront_closing(info);
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index b8ecba6dcd3b..7cd4a8ec3c8f 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -13,3 +13,15 @@ config ZRAM
disks and maybe many more.
See zram.txt for more information.
+
+config ZRAM_WRITEBACK
+ bool "Write back incompressible page to backing device"
+ depends on ZRAM
+ default n
+ help
+ With incompressible page, there is no memory saving to keep it
+ in memory. Instead, write it out to backing device.
+ For this feature, admin should set up backing device via
+ /sys/block/zramX/backing_dev.
+
+ See zram.txt for more infomration.
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 856d5dc02451..4063f3f59f4f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -270,6 +270,349 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
+#ifdef CONFIG_ZRAM_WRITEBACK
+static bool zram_wb_enabled(struct zram *zram)
+{
+ return zram->backing_dev;
+}
+
+static void reset_bdev(struct zram *zram)
+{
+ struct block_device *bdev;
+
+ if (!zram_wb_enabled(zram))
+ return;
+
+ bdev = zram->bdev;
+ if (zram->old_block_size)
+ set_blocksize(bdev, zram->old_block_size);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ /* hope filp_close flush all of IO */
+ filp_close(zram->backing_dev, NULL);
+ zram->backing_dev = NULL;
+ zram->old_block_size = 0;
+ zram->bdev = NULL;
+
+ kvfree(zram->bitmap);
+ zram->bitmap = NULL;
+}
+
+static ssize_t backing_dev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+ struct file *file = zram->backing_dev;
+ char *p;
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ if (!zram_wb_enabled(zram)) {
+ memcpy(buf, "none\n", 5);
+ up_read(&zram->init_lock);
+ return 5;
+ }
+
+ p = file_path(file, buf, PAGE_SIZE - 1);
+ if (IS_ERR(p)) {
+ ret = PTR_ERR(p);
+ goto out;
+ }
+
+ ret = strlen(p);
+ memmove(buf, p, ret);
+ buf[ret++] = '\n';
+out:
+ up_read(&zram->init_lock);
+ return ret;
+}
+
+static ssize_t backing_dev_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ char *file_name;
+ struct file *backing_dev = NULL;
+ struct inode *inode;
+ struct address_space *mapping;
+ unsigned int bitmap_sz, old_block_size = 0;
+ unsigned long nr_pages, *bitmap = NULL;
+ struct block_device *bdev = NULL;
+ int err;
+ struct zram *zram = dev_to_zram(dev);
+
+ file_name = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!file_name)
+ return -ENOMEM;
+
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ pr_info("Can't setup backing device for initialized device\n");
+ err = -EBUSY;
+ goto out;
+ }
+
+ strlcpy(file_name, buf, len);
+
+ backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
+ if (IS_ERR(backing_dev)) {
+ err = PTR_ERR(backing_dev);
+ backing_dev = NULL;
+ goto out;
+ }
+
+ mapping = backing_dev->f_mapping;
+ inode = mapping->host;
+
+ /* Support only block device in this moment */
+ if (!S_ISBLK(inode->i_mode)) {
+ err = -ENOTBLK;
+ goto out;
+ }
+
+ bdev = bdgrab(I_BDEV(inode));
+ err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
+ if (err < 0)
+ goto out;
+
+ nr_pages = i_size_read(inode) >> PAGE_SHIFT;
+ bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
+ bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
+ if (!bitmap) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ old_block_size = block_size(bdev);
+ err = set_blocksize(bdev, PAGE_SIZE);
+ if (err)
+ goto out;
+
+ reset_bdev(zram);
+ spin_lock_init(&zram->bitmap_lock);
+
+ zram->old_block_size = old_block_size;
+ zram->bdev = bdev;
+ zram->backing_dev = backing_dev;
+ zram->bitmap = bitmap;
+ zram->nr_pages = nr_pages;
+ up_write(&zram->init_lock);
+
+ pr_info("setup backing device %s\n", file_name);
+ kfree(file_name);
+
+ return len;
+out:
+ if (bitmap)
+ kvfree(bitmap);
+
+ if (bdev)
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+
+ if (backing_dev)
+ filp_close(backing_dev, NULL);
+
+ up_write(&zram->init_lock);
+
+ kfree(file_name);
+
+ return err;
+}
+
+static unsigned long get_entry_bdev(struct zram *zram)
+{
+ unsigned long entry;
+
+ spin_lock(&zram->bitmap_lock);
+ /* skip 0 bit to confuse zram.handle = 0 */
+ entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
+ if (entry == zram->nr_pages) {
+ spin_unlock(&zram->bitmap_lock);
+ return 0;
+ }
+
+ set_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+
+ return entry;
+}
+
+static void put_entry_bdev(struct zram *zram, unsigned long entry)
+{
+ int was_set;
+
+ spin_lock(&zram->bitmap_lock);
+ was_set = test_and_clear_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+ WARN_ON_ONCE(!was_set);
+}
+
+void zram_page_end_io(struct bio *bio)
+{
+ struct page *page = bio->bi_io_vec[0].bv_page;
+
+ page_endio(page, op_is_write(bio_op(bio)),
+ blk_status_to_errno(bio->bi_status));
+ bio_put(bio);
+}
+
+/*
+ * Returns 1 if the submission is successful.
+ */
+static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio_set_dev(bio, zram->bdev);
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
+ bio_put(bio);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_opf = REQ_OP_READ;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_opf = parent->bi_opf;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(bio);
+ return 1;
+}
+
+struct zram_work {
+ struct work_struct work;
+ struct zram *zram;
+ unsigned long entry;
+ struct bio *bio;
+};
+
+#if PAGE_SIZE != 4096
+static void zram_sync_read(struct work_struct *work)
+{
+ struct bio_vec bvec;
+ struct zram_work *zw = container_of(work, struct zram_work, work);
+ struct zram *zram = zw->zram;
+ unsigned long entry = zw->entry;
+ struct bio *bio = zw->bio;
+
+ read_from_bdev_async(zram, &bvec, entry, bio);
+}
+
+/*
+ * Block layer want one ->make_request_fn to be active at a time
+ * so if we use chained IO with parent IO in same context,
+ * it's a deadlock. To avoid, it, it uses worker thread context.
+ */
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ struct zram_work work;
+
+ work.zram = zram;
+ work.entry = entry;
+ work.bio = bio;
+
+ INIT_WORK_ONSTACK(&work.work, zram_sync_read);
+ queue_work(system_unbound_wq, &work.work);
+ flush_work(&work.work);
+ destroy_work_on_stack(&work.work);
+
+ return 1;
+}
+#else
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ WARN_ON(1);
+ return -EIO;
+}
+#endif
+
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
+{
+ if (sync)
+ return read_from_bdev_sync(zram, bvec, entry, parent);
+ else
+ return read_from_bdev_async(zram, bvec, entry, parent);
+}
+
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+{
+ struct bio *bio;
+ unsigned long entry;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ entry = get_entry_bdev(zram);
+ if (!entry) {
+ bio_put(bio);
+ return -ENOSPC;
+ }
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio_set_dev(bio, zram->bdev);
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+ bvec->bv_offset)) {
+ bio_put(bio);
+ put_entry_bdev(zram, entry);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_opf = parent->bi_opf;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(bio);
+ *pentry = entry;
+
+ return 0;
+}
+
+static void zram_wb_clear(struct zram *zram, u32 index)
+{
+ unsigned long entry;
+
+ zram_clear_flag(zram, index, ZRAM_WB);
+ entry = zram_get_element(zram, index);
+ zram_set_element(zram, index, 0);
+ put_entry_bdev(zram, entry);
+}
+
+#else
+static bool zram_wb_enabled(struct zram *zram) { return false; }
+static inline void reset_bdev(struct zram *zram) {};
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+
+{
+ return -EIO;
+}
+
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
+{
+ return -EIO;
+}
+static void zram_wb_clear(struct zram *zram, u32 index) {}
+#endif
+
+
/*
* We switched to per-cpu streams and this attr is not needed anymore.
* However, we will keep it around for some time, because:
@@ -308,7 +651,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
- char compressor[CRYPTO_MAX_ALG_NAME];
+ char compressor[ARRAY_SIZE(zram->compressor)];
size_t sz;
strlcpy(compressor, buf, sizeof(compressor));
@@ -327,7 +670,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
return -EBUSY;
}
- strlcpy(zram->compressor, compressor, sizeof(compressor));
+ strcpy(zram->compressor, compressor);
up_write(&zram->init_lock);
return len;
}
@@ -453,30 +796,6 @@ static bool zram_same_page_read(struct zram *zram, u32 index,
return false;
}
-static bool zram_same_page_write(struct zram *zram, u32 index,
- struct page *page)
-{
- unsigned long element;
- void *mem = kmap_atomic(page);
-
- if (page_same_filled(mem, &element)) {
- kunmap_atomic(mem);
- /* Free memory associated with this sector now. */
- zram_slot_lock(zram, index);
- zram_free_page(zram, index);
- zram_set_flag(zram, index, ZRAM_SAME);
- zram_set_element(zram, index, element);
- zram_slot_unlock(zram, index);
-
- atomic64_inc(&zram->stats.same_pages);
- atomic64_inc(&zram->stats.pages_stored);
- return true;
- }
- kunmap_atomic(mem);
-
- return false;
-}
-
static void zram_meta_free(struct zram *zram, u64 disksize)
{
size_t num_pages = disksize >> PAGE_SHIFT;
@@ -515,7 +834,13 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
*/
static void zram_free_page(struct zram *zram, size_t index)
{
- unsigned long handle = zram_get_handle(zram, index);
+ unsigned long handle;
+
+ if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
+ zram_wb_clear(zram, index);
+ atomic64_dec(&zram->stats.pages_stored);
+ return;
+ }
/*
* No memory is allocated for same element filled pages.
@@ -529,6 +854,7 @@ static void zram_free_page(struct zram *zram, size_t index)
return;
}
+ handle = zram_get_handle(zram, index);
if (!handle)
return;
@@ -542,13 +868,31 @@ static void zram_free_page(struct zram *zram, size_t index)
zram_set_obj_size(zram, index, 0);
}
-static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
+static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
+ struct bio *bio, bool partial_io)
{
int ret;
unsigned long handle;
unsigned int size;
void *src, *dst;
+ if (zram_wb_enabled(zram)) {
+ zram_slot_lock(zram, index);
+ if (zram_test_flag(zram, index, ZRAM_WB)) {
+ struct bio_vec bvec;
+
+ zram_slot_unlock(zram, index);
+
+ bvec.bv_page = page;
+ bvec.bv_len = PAGE_SIZE;
+ bvec.bv_offset = 0;
+ return read_from_bdev(zram, &bvec,
+ zram_get_element(zram, index),
+ bio, partial_io);
+ }
+ zram_slot_unlock(zram, index);
+ }
+
if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE))
return 0;
@@ -581,7 +925,7 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page;
@@ -594,7 +938,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
return -ENOMEM;
}
- ret = zram_decompress_page(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
if (unlikely(ret))
goto out;
@@ -613,30 +957,57 @@ out:
return ret;
}
-static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
- struct page *page,
- unsigned long *out_handle, unsigned int *out_comp_len)
+static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *bio)
{
- int ret;
- unsigned int comp_len;
- void *src;
+ int ret = 0;
unsigned long alloced_pages;
unsigned long handle = 0;
+ unsigned int comp_len = 0;
+ void *src, *dst, *mem;
+ struct zcomp_strm *zstrm;
+ struct page *page = bvec->bv_page;
+ unsigned long element = 0;
+ enum zram_pageflags flags = 0;
+ bool allow_wb = true;
+
+ mem = kmap_atomic(page);
+ if (page_same_filled(mem, &element)) {
+ kunmap_atomic(mem);
+ /* Free memory associated with this sector now. */
+ flags = ZRAM_SAME;
+ atomic64_inc(&zram->stats.same_pages);
+ goto out;
+ }
+ kunmap_atomic(mem);
compress_again:
+ zstrm = zcomp_stream_get(zram->comp);
src = kmap_atomic(page);
- ret = zcomp_compress(*zstrm, src, &comp_len);
+ ret = zcomp_compress(zstrm, src, &comp_len);
kunmap_atomic(src);
if (unlikely(ret)) {
+ zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
- if (handle)
- zs_free(zram->mem_pool, handle);
+ zs_free(zram->mem_pool, handle);
return ret;
}
- if (unlikely(comp_len > max_zpage_size))
+ if (unlikely(comp_len > max_zpage_size)) {
+ if (zram_wb_enabled(zram) && allow_wb) {
+ zcomp_stream_put(zram->comp);
+ ret = write_to_bdev(zram, bvec, index, bio, &element);
+ if (!ret) {
+ flags = ZRAM_WB;
+ ret = 1;
+ goto out;
+ }
+ allow_wb = false;
+ goto compress_again;
+ }
comp_len = PAGE_SIZE;
+ }
/*
* handle allocation has 2 paths:
@@ -663,7 +1034,6 @@ compress_again:
handle = zs_malloc(zram->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
- *zstrm = zcomp_stream_get(zram->comp);
if (handle)
goto compress_again;
return -ENOMEM;
@@ -673,34 +1043,11 @@ compress_again:
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
+ zcomp_stream_put(zram->comp);
zs_free(zram->mem_pool, handle);
return -ENOMEM;
}
- *out_handle = handle;
- *out_comp_len = comp_len;
- return 0;
-}
-
-static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
-{
- int ret;
- unsigned long handle;
- unsigned int comp_len;
- void *src, *dst;
- struct zcomp_strm *zstrm;
- struct page *page = bvec->bv_page;
-
- if (zram_same_page_write(zram, index, page))
- return 0;
-
- zstrm = zcomp_stream_get(zram->comp);
- ret = zram_compress(zram, &zstrm, page, &handle, &comp_len);
- if (ret) {
- zcomp_stream_put(zram->comp);
- return ret;
- }
-
dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
src = zstrm->buffer;
@@ -712,25 +1059,31 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
zcomp_stream_put(zram->comp);
zs_unmap_object(zram->mem_pool, handle);
-
+ atomic64_add(comp_len, &zram->stats.compr_data_size);
+out:
/*
* Free memory associated with this sector
* before overwriting unused sectors.
*/
zram_slot_lock(zram, index);
zram_free_page(zram, index);
- zram_set_handle(zram, index, handle);
- zram_set_obj_size(zram, index, comp_len);
+
+ if (flags) {
+ zram_set_flag(zram, index, flags);
+ zram_set_element(zram, index, element);
+ } else {
+ zram_set_handle(zram, index, handle);
+ zram_set_obj_size(zram, index, comp_len);
+ }
zram_slot_unlock(zram, index);
/* Update stats */
- atomic64_add(comp_len, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.pages_stored);
- return 0;
+ return ret;
}
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page = NULL;
@@ -748,7 +1101,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (!page)
return -ENOMEM;
- ret = zram_decompress_page(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index, bio, true);
if (ret)
goto out;
@@ -763,7 +1116,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
vec.bv_offset = 0;
}
- ret = __zram_bvec_write(zram, &vec, index);
+ ret = __zram_bvec_write(zram, &vec, index, bio);
out:
if (is_partial_io(bvec))
__free_page(page);
@@ -808,28 +1161,34 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
}
+/*
+ * Returns errno if it has some problem. Otherwise return 0 or 1.
+ * Returns 0 if IO request was done synchronously
+ * Returns 1 if IO request was successfully submitted.
+ */
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, bool is_write)
+ int offset, bool is_write, struct bio *bio)
{
unsigned long start_time = jiffies;
int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
+ struct request_queue *q = zram->disk->queue;
int ret;
- generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT,
+ generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
&zram->disk->part0);
if (!is_write) {
atomic64_inc(&zram->stats.num_reads);
- ret = zram_bvec_read(zram, bvec, index, offset);
+ ret = zram_bvec_read(zram, bvec, index, offset, bio);
flush_dcache_page(bvec->bv_page);
} else {
atomic64_inc(&zram->stats.num_writes);
- ret = zram_bvec_write(zram, bvec, index, offset);
+ ret = zram_bvec_write(zram, bvec, index, offset, bio);
}
- generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
+ generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
- if (unlikely(ret)) {
+ if (unlikely(ret < 0)) {
if (!is_write)
atomic64_inc(&zram->stats.failed_reads);
else
@@ -868,7 +1227,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
unwritten);
if (zram_bvec_rw(zram, &bv, index, offset,
- op_is_write(bio_op(bio))) < 0)
+ op_is_write(bio_op(bio)), bio) < 0)
goto out;
bv.bv_offset += bv.bv_len;
@@ -922,16 +1281,18 @@ static void zram_slot_free_notify(struct block_device *bdev,
static int zram_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
- int offset, err = -EIO;
+ int offset, ret;
u32 index;
struct zram *zram;
struct bio_vec bv;
+ if (PageTransHuge(page))
+ return -ENOTSUPP;
zram = bdev->bd_disk->private_data;
if (!valid_io_request(zram, sector, PAGE_SIZE)) {
atomic64_inc(&zram->stats.invalid_io);
- err = -EINVAL;
+ ret = -EINVAL;
goto out;
}
@@ -942,7 +1303,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
- err = zram_bvec_rw(zram, &bv, index, offset, is_write);
+ ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
@@ -952,9 +1313,20 @@ out:
* bio->bi_end_io does things to handle the error
* (e.g., SetPageError, set_page_dirty and extra works).
*/
- if (err == 0)
+ if (unlikely(ret < 0))
+ return ret;
+
+ switch (ret) {
+ case 0:
page_endio(page, is_write, 0);
- return err;
+ break;
+ case 1:
+ ret = 0;
+ break;
+ default:
+ WARN_ON(1);
+ }
+ return ret;
}
static void zram_reset_device(struct zram *zram)
@@ -983,6 +1355,7 @@ static void zram_reset_device(struct zram *zram)
zram_meta_free(zram, disksize);
memset(&zram->stats, 0, sizeof(zram->stats));
zcomp_destroy(comp);
+ reset_bdev(zram);
}
static ssize_t disksize_store(struct device *dev,
@@ -1108,6 +1481,9 @@ static DEVICE_ATTR_WO(mem_limit);
static DEVICE_ATTR_WO(mem_used_max);
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
+#ifdef CONFIG_ZRAM_WRITEBACK
+static DEVICE_ATTR_RW(backing_dev);
+#endif
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -1118,6 +1494,9 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_mem_used_max.attr,
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
+#ifdef CONFIG_ZRAM_WRITEBACK
+ &dev_attr_backing_dev.attr,
+#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
&dev_attr_debug_stat.attr,
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index e34e44d02e3e..31762db861e3 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -60,9 +60,10 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/* Flags for zram pages (table[page_no].value) */
enum zram_pageflags {
- /* Page consists entirely of zeros */
+ /* Page consists the same element */
ZRAM_SAME = ZRAM_FLAG_SHIFT,
ZRAM_ACCESS, /* page is now accessed */
+ ZRAM_WB, /* page is stored on backing_device */
__NR_ZRAM_PAGEFLAGS,
};
@@ -115,5 +116,13 @@ struct zram {
* zram is claimed so open request will be failed
*/
bool claim; /* Protected by bdev->bd_mutex */
+#ifdef CONFIG_ZRAM_WRITEBACK
+ struct file *backing_dev;
+ struct block_device *bdev;
+ unsigned int old_block_size;
+ unsigned long *bitmap;
+ unsigned long nr_pages;
+ spinlock_t bitmap_lock;
+#endif
};
#endif
OpenPOWER on IntegriCloud