diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 11 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 66 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 390 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.h | 53 | ||||
-rw-r--r-- | drivers/block/rbd.c | 72 | ||||
-rw-r--r-- | drivers/block/umem.c | 40 | ||||
-rw-r--r-- | drivers/block/xen-blkback/common.h | 2 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 58 |
8 files changed, 461 insertions, 231 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index b5c5ff53cb57..fcb956bb4b4c 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1475,10 +1475,17 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi first_word = 0; spin_lock_irq(&b->bm_lock); } - /* last page (respectively only page, for first page == last page) */ last_word = MLPP(el >> LN2_BPL); - bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); + + /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples). + * ==> e = 32767, el = 32768, last_page = 2, + * and now last_word = 0. + * We do not want to touch last_page in this case, + * as we did not allocate it, it is not present in bitmap->bm_pages. + */ + if (last_word) + bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); /* possibly trailing bits. * example: (e & 63) == 63, el will be e+1. diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 9c5c84946b05..8e93a6ac9bb6 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -472,12 +472,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + if (req->rq_state & RQ_LOCAL_ABORTED) { + _req_may_be_done(req, m); + break; + } __drbd_chk_io_error(mdev, false); goto_queue_for_net_read: + D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { @@ -765,6 +770,40 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); } +static void maybe_pull_ahead(struct drbd_conf *mdev) +{ + int congested = 0; + + /* If I don't even have good local storage, we can not reasonably try + * to pull ahead of the peer. We also need the local reference to make + * sure mdev->act_log is there. + * Note: caller has to make sure that net_conf is there. + */ + if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) + return; + + if (mdev->net_conf->cong_fill && + atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { + dev_info(DEV, "Congestion-fill threshold reached\n"); + congested = 1; + } + + if (mdev->act_log->used >= mdev->net_conf->cong_extents) { + dev_info(DEV, "Congestion-extents threshold reached\n"); + congested = 1; + } + + if (congested) { + queue_barrier(mdev); /* last barrier, after mirrored writes */ + + if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) + _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); + else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ + _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); + } + put_ldev(mdev); +} + static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { const int rw = bio_rw(bio); @@ -972,29 +1011,8 @@ allocate_barrier: _req_mod(req, queue_for_send_oos); if (remote && - mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { - int congested = 0; - - if (mdev->net_conf->cong_fill && - atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { - dev_info(DEV, "Congestion-fill threshold reached\n"); - congested = 1; - } - - if (mdev->act_log->used >= mdev->net_conf->cong_extents) { - dev_info(DEV, "Congestion-extents threshold reached\n"); - congested = 1; - } - - if (congested) { - queue_barrier(mdev); /* last barrier, after mirrored writes */ - - if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) - _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); - else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ - _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); - } - } + mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) + maybe_pull_ahead(mdev); spin_unlock_irq(&mdev->req_lock); kfree(b); /* if someone else has beaten us to it... */ diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 304000c3d433..a8fddeb3d638 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -37,6 +37,7 @@ #include <linux/kthread.h> #include <../drivers/ata/ahci.h> #include <linux/export.h> +#include <linux/debugfs.h> #include "mtip32xx.h" #define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) @@ -85,6 +86,7 @@ static int instance; * allocated in mtip_init(). */ static int mtip_major; +static struct dentry *dfs_parent; static DEFINE_SPINLOCK(rssd_index_lock); static DEFINE_IDA(rssd_index_ida); @@ -294,18 +296,16 @@ static int hba_reset_nosleep(struct driver_data *dd) */ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) { - unsigned long flags = 0; - atomic_set(&port->commands[tag].active, 1); - spin_lock_irqsave(&port->cmd_issue_lock, flags); + spin_lock(&port->cmd_issue_lock); writel((1 << MTIP_TAG_BIT(tag)), port->s_active[MTIP_TAG_INDEX(tag)]); writel((1 << MTIP_TAG_BIT(tag)), port->cmd_issue[MTIP_TAG_INDEX(tag)]); - spin_unlock_irqrestore(&port->cmd_issue_lock, flags); + spin_unlock(&port->cmd_issue_lock); /* Set the command's timeout value.*/ port->commands[tag].comp_time = jiffies + msecs_to_jiffies( @@ -436,8 +436,7 @@ static void mtip_init_port(struct mtip_port *port) writel(0xFFFFFFFF, port->completed[i]); /* Clear any pending interrupts for this port */ - writel(readl(port->dd->mmio + PORT_IRQ_STAT), - port->dd->mmio + PORT_IRQ_STAT); + writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT); /* Clear any pending interrupts on the HBA. */ writel(readl(port->dd->mmio + HOST_IRQ_STAT), @@ -782,13 +781,24 @@ static void mtip_handle_tfe(struct driver_data *dd) /* Stop the timer to prevent command timeouts. */ del_timer(&port->cmd_timer); + set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); + + if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && + test_bit(MTIP_TAG_INTERNAL, port->allocated)) { + cmd = &port->commands[MTIP_TAG_INTERNAL]; + dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); + + atomic_inc(&cmd->active); /* active > 1 indicates error */ + if (cmd->comp_data && cmd->comp_func) { + cmd->comp_func(port, MTIP_TAG_INTERNAL, + cmd->comp_data, PORT_IRQ_TF_ERR); + } + goto handle_tfe_exit; + } /* clear the tag accumulator */ memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); - /* Set eh_active */ - set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - /* Loop through all the groups */ for (group = 0; group < dd->slot_groups; group++) { completed = readl(port->completed[group]); @@ -940,6 +950,7 @@ static void mtip_handle_tfe(struct driver_data *dd) } print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); +handle_tfe_exit: /* clear eh_active */ clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); @@ -961,6 +972,8 @@ static inline void mtip_process_sdbf(struct driver_data *dd) /* walk all bits in all slot groups */ for (group = 0; group < dd->slot_groups; group++) { completed = readl(port->completed[group]); + if (!completed) + continue; /* clear completed status register in the hardware.*/ writel(completed, port->completed[group]); @@ -1329,22 +1342,6 @@ static int mtip_exec_internal_command(struct mtip_port *port, } rv = -EAGAIN; } - - if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL)) { - dev_warn(&port->dd->pdev->dev, - "Retiring internal command but CI is 1.\n"); - if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { - hba_reset_nosleep(port->dd); - rv = -ENXIO; - } else { - mtip_restart_port(port); - rv = -EAGAIN; - } - goto exec_ic_exit; - } - } else { /* Spin for <timeout> checking if command still outstanding */ timeout = jiffies + msecs_to_jiffies(timeout); @@ -1361,21 +1358,25 @@ static int mtip_exec_internal_command(struct mtip_port *port, rv = -ENXIO; goto exec_ic_exit; } + if (readl(port->mmio + PORT_IRQ_STAT) & PORT_IRQ_ERR) { + atomic_inc(&int_cmd->active); /* error */ + break; + } } + } - if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) + if (atomic_read(&int_cmd->active) > 1) { + dev_err(&port->dd->pdev->dev, + "Internal command [%02X] failed\n", fis->command); + rv = -EIO; + } + if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) { - dev_err(&port->dd->pdev->dev, - "Internal command did not complete [atomic]\n"); + rv = -ENXIO; + if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &port->dd->dd_flag)) { + mtip_restart_port(port); rv = -EAGAIN; - if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { - hba_reset_nosleep(port->dd); - rv = -ENXIO; - } else { - mtip_restart_port(port); - rv = -EAGAIN; - } } } exec_ic_exit: @@ -1893,13 +1894,33 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, void __user *user_buffer) { struct host_to_dev_fis fis; - struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG); + struct host_to_dev_fis *reply; + u8 *buf = NULL; + dma_addr_t dma_addr = 0; + int rv = 0, xfer_sz = command[3]; + + if (xfer_sz) { + if (user_buffer) + return -EFAULT; + + buf = dmam_alloc_coherent(&port->dd->pdev->dev, + ATA_SECT_SIZE * xfer_sz, + &dma_addr, + GFP_KERNEL); + if (!buf) { + dev_err(&port->dd->pdev->dev, + "Memory allocation failed (%d bytes)\n", + ATA_SECT_SIZE * xfer_sz); + return -ENOMEM; + } + memset(buf, 0, ATA_SECT_SIZE * xfer_sz); + } /* Build the FIS. */ memset(&fis, 0, sizeof(struct host_to_dev_fis)); - fis.type = 0x27; - fis.opts = 1 << 7; - fis.command = command[0]; + fis.type = 0x27; + fis.opts = 1 << 7; + fis.command = command[0]; fis.features = command[2]; fis.sect_count = command[3]; if (fis.command == ATA_CMD_SMART) { @@ -1908,6 +1929,11 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, fis.cyl_hi = 0xC2; } + if (xfer_sz) + reply = (port->rxfis + RX_FIS_PIO_SETUP); + else + reply = (port->rxfis + RX_FIS_D2H_REG); + dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, sect %x, " "feat %x, sectcnt %x\n", @@ -1917,43 +1943,46 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, command[2], command[3]); - memset(port->sector_buffer, 0x00, ATA_SECT_SIZE); - /* Execute the command. */ if (mtip_exec_internal_command(port, &fis, 5, - port->sector_buffer_dma, - (command[3] != 0) ? ATA_SECT_SIZE : 0, + (xfer_sz ? dma_addr : 0), + (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0), 0, GFP_KERNEL, MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) { - return -1; + rv = -EFAULT; + goto exit_drive_command; } /* Collect the completion status. */ command[0] = reply->command; /* Status*/ command[1] = reply->features; /* Error*/ - command[2] = command[3]; + command[2] = reply->sect_count; dbg_printk(MTIP_DRV_NAME " %s: Completion Status: stat %x, " - "err %x, cmd %x\n", + "err %x, nsect %x\n", __func__, command[0], command[1], command[2]); - if (user_buffer && command[3]) { + if (xfer_sz) { if (copy_to_user(user_buffer, - port->sector_buffer, + buf, ATA_SECT_SIZE * command[3])) { - return -EFAULT; + rv = -EFAULT; + goto exit_drive_command; } } - - return 0; +exit_drive_command: + if (buf) + dmam_free_coherent(&port->dd->pdev->dev, + ATA_SECT_SIZE * xfer_sz, buf, dma_addr); + return rv; } /* @@ -2003,6 +2032,32 @@ static unsigned int implicit_sector(unsigned char command, return rv; } +static void mtip_set_timeout(struct host_to_dev_fis *fis, unsigned int *timeout) +{ + switch (fis->command) { + case ATA_CMD_DOWNLOAD_MICRO: + *timeout = 120000; /* 2 minutes */ + break; + case ATA_CMD_SEC_ERASE_UNIT: + case 0xFC: + *timeout = 240000; /* 4 minutes */ + break; + case ATA_CMD_STANDBYNOW1: + *timeout = 10000; /* 10 seconds */ + break; + case 0xF7: + case 0xFA: + *timeout = 60000; /* 60 seconds */ + break; + case ATA_CMD_SMART: + *timeout = 15000; /* 15 seconds */ + break; + default: + *timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; + break; + } +} + /* * Executes a taskfile * See ide_taskfile_ioctl() for derivation @@ -2023,7 +2078,7 @@ static int exec_drive_taskfile(struct driver_data *dd, unsigned int taskin = 0; unsigned int taskout = 0; u8 nsect = 0; - unsigned int timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; + unsigned int timeout; unsigned int force_single_sector; unsigned int transfer_size; unsigned long task_file_data; @@ -2153,32 +2208,7 @@ static int exec_drive_taskfile(struct driver_data *dd, fis.lba_hi, fis.device); - switch (fis.command) { - case ATA_CMD_DOWNLOAD_MICRO: - /* Change timeout for Download Microcode to 2 minutes */ - timeout = 120000; - break; - case ATA_CMD_SEC_ERASE_UNIT: - /* Change timeout for Security Erase Unit to 4 minutes.*/ - timeout = 240000; - break; - case ATA_CMD_STANDBYNOW1: - /* Change timeout for standby immediate to 10 seconds.*/ - timeout = 10000; - break; - case 0xF7: - case 0xFA: - /* Change timeout for vendor unique command to 10 secs */ - timeout = 10000; - break; - case ATA_CMD_SMART: - /* Change timeout for vendor unique command to 15 secs */ - timeout = 15000; - break; - default: - timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; - break; - } + mtip_set_timeout(&fis, &timeout); /* Determine the correct transfer size.*/ if (force_single_sector) @@ -2295,13 +2325,12 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, { switch (cmd) { case HDIO_GET_IDENTITY: - if (mtip_get_identify(dd->port, (void __user *) arg) < 0) { - dev_warn(&dd->pdev->dev, - "Unable to read identity\n"); - return -EIO; - } - + { + if (copy_to_user((void __user *)arg, dd->port->identify, + sizeof(u16) * ATA_ID_WORDS)) + return -EFAULT; break; + } case HDIO_DRIVE_CMD: { u8 drive_command[4]; @@ -2519,7 +2548,7 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, } /* - * Sysfs register/status dump. + * Sysfs status dump. * * @dev Pointer to the device structure, passed by the kernrel. * @attr Pointer to the device_attribute structure passed by the kernel. @@ -2528,72 +2557,138 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, * return value * The size, in bytes, of the data copied into buf. */ -static ssize_t mtip_hw_show_registers(struct device *dev, +static ssize_t mtip_hw_show_status(struct device *dev, struct device_attribute *attr, char *buf) { - u32 group_allocated; struct driver_data *dd = dev_to_disk(dev)->private_data; int size = 0; + + if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag)) + size += sprintf(buf, "%s", "thermal_shutdown\n"); + else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag)) + size += sprintf(buf, "%s", "write_protect\n"); + else + size += sprintf(buf, "%s", "online\n"); + + return size; +} + +static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL); + +static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, + size_t len, loff_t *offset) +{ + struct driver_data *dd = (struct driver_data *)f->private_data; + char buf[MTIP_DFS_MAX_BUF_SIZE]; + u32 group_allocated; + int size = *offset; int n; - size += sprintf(&buf[size], "S ACTive:\n"); + if (!len || size) + return 0; + + if (size < 0) + return -EINVAL; + + size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); - for (n = 0; n < dd->slot_groups; n++) - size += sprintf(&buf[size], "0x%08x\n", + for (n = dd->slot_groups-1; n >= 0; n--) + size += sprintf(&buf[size], "%08X ", readl(dd->port->s_active[n])); - size += sprintf(&buf[size], "Command Issue:\n"); + size += sprintf(&buf[size], "]\n"); + size += sprintf(&buf[size], "H/ Command Issue : [ 0x"); - for (n = 0; n < dd->slot_groups; n++) - size += sprintf(&buf[size], "0x%08x\n", + for (n = dd->slot_groups-1; n >= 0; n--) + size += sprintf(&buf[size], "%08X ", readl(dd->port->cmd_issue[n])); - size += sprintf(&buf[size], "Allocated:\n"); + size += sprintf(&buf[size], "]\n"); + size += sprintf(&buf[size], "H/ Completed : [ 0x"); + + for (n = dd->slot_groups-1; n >= 0; n--) + size += sprintf(&buf[size], "%08X ", + readl(dd->port->completed[n])); - for (n = 0; n < dd->slot_groups; n++) { + size += sprintf(&buf[size], "]\n"); + size += sprintf(&buf[size], "H/ PORT IRQ STAT : [ 0x%08X ]\n", + readl(dd->port->mmio + PORT_IRQ_STAT)); + size += sprintf(&buf[size], "H/ HOST IRQ STAT : [ 0x%08X ]\n", + readl(dd->mmio + HOST_IRQ_STAT)); + size += sprintf(&buf[size], "\n"); + + size += sprintf(&buf[size], "L/ Allocated : [ 0x"); + + for (n = dd->slot_groups-1; n >= 0; n--) { if (sizeof(long) > sizeof(u32)) group_allocated = dd->port->allocated[n/2] >> (32*(n&1)); else group_allocated = dd->port->allocated[n]; - size += sprintf(&buf[size], "0x%08x\n", - group_allocated); + size += sprintf(&buf[size], "%08X ", group_allocated); } + size += sprintf(&buf[size], "]\n"); - size += sprintf(&buf[size], "Completed:\n"); + size += sprintf(&buf[size], "L/ Commands in Q : [ 0x"); - for (n = 0; n < dd->slot_groups; n++) - size += sprintf(&buf[size], "0x%08x\n", - readl(dd->port->completed[n])); + for (n = dd->slot_groups-1; n >= 0; n--) { + if (sizeof(long) > sizeof(u32)) + group_allocated = + dd->port->cmds_to_issue[n/2] >> (32*(n&1)); + else + group_allocated = dd->port->cmds_to_issue[n]; + size += sprintf(&buf[size], "%08X ", group_allocated); + } + size += sprintf(&buf[size], "]\n"); - size += sprintf(&buf[size], "PORT IRQ STAT : 0x%08x\n", - readl(dd->port->mmio + PORT_IRQ_STAT)); - size += sprintf(&buf[size], "HOST IRQ STAT : 0x%08x\n", - readl(dd->mmio + HOST_IRQ_STAT)); + *offset = size <= len ? size : len; + size = copy_to_user(ubuf, buf, *offset); + if (size) + return -EFAULT; - return size; + return *offset; } -static ssize_t mtip_hw_show_status(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, + size_t len, loff_t *offset) { - struct driver_data *dd = dev_to_disk(dev)->private_data; - int size = 0; + struct driver_data *dd = (struct driver_data *)f->private_data; + char buf[MTIP_DFS_MAX_BUF_SIZE]; + int size = *offset; - if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag)) - size += sprintf(buf, "%s", "thermal_shutdown\n"); - else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag)) - size += sprintf(buf, "%s", "write_protect\n"); - else - size += sprintf(buf, "%s", "online\n"); + if (!len || size) + return 0; - return size; + if (size < 0) + return -EINVAL; + + size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", + dd->port->flags); + size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n", + dd->dd_flag); + + *offset = size <= len ? size : len; + size = copy_to_user(ubuf, buf, *offset); + if (size) + return -EFAULT; + + return *offset; } -static DEVICE_ATTR(registers, S_IRUGO, mtip_hw_show_registers, NULL); -static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL); +static const struct file_operations mtip_regs_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mtip_hw_read_registers, + .llseek = no_llseek, +}; + +static const struct file_operations mtip_flags_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mtip_hw_read_flags, + .llseek = no_llseek, +}; /* * Create the sysfs related attributes. @@ -2610,9 +2705,6 @@ static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj) if (!kobj || !dd) return -EINVAL; - if (sysfs_create_file(kobj, &dev_attr_registers.attr)) - dev_warn(&dd->pdev->dev, - "Error creating 'registers' sysfs entry\n"); if (sysfs_create_file(kobj, &dev_attr_status.attr)) dev_warn(&dd->pdev->dev, "Error creating 'status' sysfs entry\n"); @@ -2634,12 +2726,39 @@ static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj) if (!kobj || !dd) return -EINVAL; - sysfs_remove_file(kobj, &dev_attr_registers.attr); sysfs_remove_file(kobj, &dev_attr_status.attr); return 0; } +static int mtip_hw_debugfs_init(struct driver_data *dd) +{ + if (!dfs_parent) + return -1; + + dd->dfs_node = debugfs_create_dir(dd->disk->disk_name, dfs_parent); + if (IS_ERR_OR_NULL(dd->dfs_node)) { + dev_warn(&dd->pdev->dev, + "Error creating node %s under debugfs\n", + dd->disk->disk_name); + dd->dfs_node = NULL; + return -1; + } + + debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd, + &mtip_flags_fops); + debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd, + &mtip_regs_fops); + + return 0; +} + +static void mtip_hw_debugfs_exit(struct driver_data *dd) +{ + debugfs_remove_recursive(dd->dfs_node); +} + + /* * Perform any init/resume time hardware setup * @@ -3634,7 +3753,10 @@ skip_create_disk: set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); blk_queue_max_segments(dd->queue, MTIP_MAX_SG); blk_queue_physical_block_size(dd->queue, 4096); + blk_queue_max_hw_sectors(dd->queue, 0xffff); + blk_queue_max_segment_size(dd->queue, 0x400000); blk_queue_io_min(dd->queue, 4096); + /* * write back cache is not supported in the device. FUA depends on * write back cache support, hence setting flush support to zero. @@ -3662,6 +3784,7 @@ skip_create_disk: mtip_hw_sysfs_init(dd, kobj); kobject_put(kobj); } + mtip_hw_debugfs_init(dd); if (dd->mtip_svc_handler) { set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); @@ -3687,6 +3810,8 @@ start_service_thread: return rv; kthread_run_error: + mtip_hw_debugfs_exit(dd); + /* Delete our gendisk. This also removes the device from /dev */ del_gendisk(dd->disk); @@ -3737,6 +3862,7 @@ static int mtip_block_remove(struct driver_data *dd) kobject_put(kobj); } } + mtip_hw_debugfs_exit(dd); /* * Delete our gendisk structure. This also removes the device @@ -4084,10 +4210,20 @@ static int __init mtip_init(void) } mtip_major = error; + if (!dfs_parent) { + dfs_parent = debugfs_create_dir("rssd", NULL); + if (IS_ERR_OR_NULL(dfs_parent)) { + printk(KERN_WARNING "Error creating debugfs parent\n"); + dfs_parent = NULL; + } + } + /* Register our PCI operations. */ error = pci_register_driver(&mtip_pci_driver); - if (error) + if (error) { + debugfs_remove(dfs_parent); unregister_blkdev(mtip_major, MTIP_DRV_NAME); + } return error; } @@ -4104,6 +4240,8 @@ static int __init mtip_init(void) */ static void __exit mtip_exit(void) { + debugfs_remove_recursive(dfs_parent); + /* Release the allocated major block device number. */ unregister_blkdev(mtip_major, MTIP_DRV_NAME); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 4ef58336310a..f51fc23d17bb 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -26,7 +26,6 @@ #include <linux/ata.h> #include <linux/interrupt.h> #include <linux/genhd.h> -#include <linux/version.h> /* Offset of Subsystem Device ID in pci confoguration space */ #define PCI_SUBSYSTEM_DEVICEID 0x2E @@ -111,35 +110,39 @@ #define dbg_printk(format, arg...) #endif +#define MTIP_DFS_MAX_BUF_SIZE 1024 + #define __force_bit2int (unsigned int __force) -/* below are bit numbers in 'flags' defined in mtip_port */ -#define MTIP_PF_IC_ACTIVE_BIT 0 /* pio/ioctl */ -#define MTIP_PF_EH_ACTIVE_BIT 1 /* error handling */ -#define MTIP_PF_SE_ACTIVE_BIT 2 /* secure erase */ -#define MTIP_PF_DM_ACTIVE_BIT 3 /* download microcde */ -#define MTIP_PF_PAUSE_IO ((1 << MTIP_PF_IC_ACTIVE_BIT) | \ +enum { + /* below are bit numbers in 'flags' defined in mtip_port */ + MTIP_PF_IC_ACTIVE_BIT = 0, /* pio/ioctl */ + MTIP_PF_EH_ACTIVE_BIT = 1, /* error handling */ + MTIP_PF_SE_ACTIVE_BIT = 2, /* secure erase */ + MTIP_PF_DM_ACTIVE_BIT = 3, /* download microcde */ + MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | \ (1 << MTIP_PF_EH_ACTIVE_BIT) | \ (1 << MTIP_PF_SE_ACTIVE_BIT) | \ - (1 << MTIP_PF_DM_ACTIVE_BIT)) - -#define MTIP_PF_SVC_THD_ACTIVE_BIT 4 -#define MTIP_PF_ISSUE_CMDS_BIT 5 -#define MTIP_PF_REBUILD_BIT 6 -#define MTIP_PF_SVC_THD_STOP_BIT 8 - -/* below are bit numbers in 'dd_flag' defined in driver_data */ -#define MTIP_DDF_REMOVE_PENDING_BIT 1 -#define MTIP_DDF_OVER_TEMP_BIT 2 -#define MTIP_DDF_WRITE_PROTECT_BIT 3 -#define MTIP_DDF_STOP_IO ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \ + (1 << MTIP_PF_DM_ACTIVE_BIT)), + + MTIP_PF_SVC_THD_ACTIVE_BIT = 4, + MTIP_PF_ISSUE_CMDS_BIT = 5, + MTIP_PF_REBUILD_BIT = 6, + MTIP_PF_SVC_THD_STOP_BIT = 8, + + /* below are bit numbers in 'dd_flag' defined in driver_data */ + MTIP_DDF_REMOVE_PENDING_BIT = 1, + MTIP_DDF_OVER_TEMP_BIT = 2, + MTIP_DDF_WRITE_PROTECT_BIT = 3, + MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \ (1 << MTIP_DDF_OVER_TEMP_BIT) | \ - (1 << MTIP_DDF_WRITE_PROTECT_BIT)) + (1 << MTIP_DDF_WRITE_PROTECT_BIT)), -#define MTIP_DDF_CLEANUP_BIT 5 -#define MTIP_DDF_RESUME_BIT 6 -#define MTIP_DDF_INIT_DONE_BIT 7 -#define MTIP_DDF_REBUILD_FAILED_BIT 8 + MTIP_DDF_CLEANUP_BIT = 5, + MTIP_DDF_RESUME_BIT = 6, + MTIP_DDF_INIT_DONE_BIT = 7, + MTIP_DDF_REBUILD_FAILED_BIT = 8, +}; __packed struct smart_attr{ u8 attr_id; @@ -445,6 +448,8 @@ struct driver_data { unsigned long dd_flag; /* NOTE: use atomic bit operations on this */ struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ + + struct dentry *dfs_node; }; #endif diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 013c7a549fb6..65665c9c42c6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -141,7 +141,7 @@ struct rbd_request { struct rbd_snap { struct device dev; const char *name; - size_t size; + u64 size; struct list_head node; u64 id; }; @@ -175,8 +175,7 @@ struct rbd_device { /* protects updating the header */ struct rw_semaphore header_rwsem; char snap_name[RBD_MAX_SNAP_NAME_LEN]; - u32 cur_snap; /* index+1 of current snapshot within snap context - 0 - for the head */ + u64 snap_id; /* current snapshot id */ int read_only; struct list_head node; @@ -241,7 +240,7 @@ static void rbd_put_dev(struct rbd_device *rbd_dev) put_device(&rbd_dev->dev); } -static int __rbd_update_snaps(struct rbd_device *rbd_dev); +static int __rbd_refresh_header(struct rbd_device *rbd_dev); static int rbd_open(struct block_device *bdev, fmode_t mode) { @@ -450,7 +449,9 @@ static void rbd_client_release(struct kref *kref) struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); dout("rbd_release_client %p\n", rbdc); + spin_lock(&rbd_client_list_lock); list_del(&rbdc->node); + spin_unlock(&rbd_client_list_lock); ceph_destroy_client(rbdc->client); kfree(rbdc->rbd_opts); @@ -463,9 +464,7 @@ static void rbd_client_release(struct kref *kref) */ static void rbd_put_client(struct rbd_device *rbd_dev) { - spin_lock(&rbd_client_list_lock); kref_put(&rbd_dev->rbd_client->kref, rbd_client_release); - spin_unlock(&rbd_client_list_lock); rbd_dev->rbd_client = NULL; } @@ -487,16 +486,18 @@ static void rbd_coll_release(struct kref *kref) */ static int rbd_header_from_disk(struct rbd_image_header *header, struct rbd_image_header_ondisk *ondisk, - int allocated_snaps, + u32 allocated_snaps, gfp_t gfp_flags) { - int i; - u32 snap_count; + u32 i, snap_count; if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) return -ENXIO; snap_count = le32_to_cpu(ondisk->snap_count); + if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context)) + / sizeof (*ondisk)) + return -EINVAL; header->snapc = kmalloc(sizeof(struct ceph_snap_context) + snap_count * sizeof (*ondisk), gfp_flags); @@ -506,11 +507,11 @@ static int rbd_header_from_disk(struct rbd_image_header *header, header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); if (snap_count) { header->snap_names = kmalloc(header->snap_names_len, - GFP_KERNEL); + gfp_flags); if (!header->snap_names) goto err_snapc; header->snap_sizes = kmalloc(snap_count * sizeof(u64), - GFP_KERNEL); + gfp_flags); if (!header->snap_sizes) goto err_names; } else { @@ -552,21 +553,6 @@ err_snapc: return -ENOMEM; } -static int snap_index(struct rbd_image_header *header, int snap_num) -{ - return header->total_snaps - snap_num; -} - -static u64 cur_snap_id(struct rbd_device *rbd_dev) -{ - struct rbd_image_header *header = &rbd_dev->header; - - if (!rbd_dev->cur_snap) - return 0; - - return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)]; -} - static int snap_by_name(struct rbd_image_header *header, const char *snap_name, u64 *seq, u64 *size) { @@ -605,7 +591,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) snapc->seq = header->snap_seq; else snapc->seq = 0; - dev->cur_snap = 0; + dev->snap_id = CEPH_NOSNAP; dev->read_only = 0; if (size) *size = header->image_size; @@ -613,8 +599,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) ret = snap_by_name(header, dev->snap_name, &snapc->seq, size); if (ret < 0) goto done; - - dev->cur_snap = header->total_snaps - ret; + dev->snap_id = snapc->seq; dev->read_only = 1; } @@ -935,7 +920,6 @@ static int rbd_do_request(struct request *rq, layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_stripe_count = cpu_to_le32(1); layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_preferred = cpu_to_le32(-1); layout->fl_pg_pool = cpu_to_le32(dev->poolid); ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, req, ops); @@ -1168,7 +1152,7 @@ static int rbd_req_read(struct request *rq, int coll_index) { return rbd_do_op(rq, rbd_dev, NULL, - (snapid ? snapid : CEPH_NOSNAP), + snapid, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 2, @@ -1187,7 +1171,7 @@ static int rbd_req_sync_read(struct rbd_device *dev, u64 *ver) { return rbd_req_sync_op(dev, NULL, - (snapid ? snapid : CEPH_NOSNAP), + snapid, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, @@ -1238,7 +1222,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, notify_id, (int)opcode); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rc = __rbd_update_snaps(dev); + rc = __rbd_refresh_header(dev); mutex_unlock(&ctl_mutex); if (rc) pr_warning(RBD_DRV_NAME "%d got notification but failed to " @@ -1521,7 +1505,7 @@ static void rbd_rq_fn(struct request_queue *q) coll, cur_seg); else rbd_req_read(rq, rbd_dev, - cur_snap_id(rbd_dev), + rbd_dev->snap_id, ofs, op_size, bio, coll, cur_seg); @@ -1592,7 +1576,7 @@ static int rbd_read_header(struct rbd_device *rbd_dev, { ssize_t rc; struct rbd_image_header_ondisk *dh; - int snap_count = 0; + u32 snap_count = 0; u64 ver; size_t len; @@ -1656,7 +1640,7 @@ static int rbd_header_add_snap(struct rbd_device *dev, struct ceph_mon_client *monc; /* we should create a snapshot only if we're pointing at the head */ - if (dev->cur_snap) + if (dev->snap_id != CEPH_NOSNAP) return -EINVAL; monc = &dev->rbd_client->client->monc; @@ -1683,7 +1667,9 @@ static int rbd_header_add_snap(struct rbd_device *dev, if (ret < 0) return ret; - dev->header.snapc->seq = new_snapid; + down_write(&dev->header_rwsem); + dev->header.snapc->seq = new_snapid; + up_write(&dev->header_rwsem); return 0; bad: @@ -1703,7 +1689,7 @@ static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) /* * only read the first part of the ondisk header, without the snaps info */ -static int __rbd_update_snaps(struct rbd_device *rbd_dev) +static int __rbd_refresh_header(struct rbd_device *rbd_dev) { int ret; struct rbd_image_header h; @@ -1890,7 +1876,7 @@ static ssize_t rbd_image_refresh(struct device *dev, mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rc = __rbd_update_snaps(rbd_dev); + rc = __rbd_refresh_header(rbd_dev); if (rc < 0) ret = rc; @@ -1949,7 +1935,7 @@ static ssize_t rbd_snap_size_show(struct device *dev, { struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - return sprintf(buf, "%zd\n", snap->size); + return sprintf(buf, "%llu\n", (unsigned long long)snap->size); } static ssize_t rbd_snap_id_show(struct device *dev, @@ -1958,7 +1944,7 @@ static ssize_t rbd_snap_id_show(struct device *dev, { struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - return sprintf(buf, "%llu\n", (unsigned long long) snap->id); + return sprintf(buf, "%llu\n", (unsigned long long)snap->id); } static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); @@ -2173,7 +2159,7 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) rbd_dev->header.obj_version); if (ret == -ERANGE) { mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rc = __rbd_update_snaps(rbd_dev); + rc = __rbd_refresh_header(rbd_dev); mutex_unlock(&ctl_mutex); if (rc < 0) return rc; @@ -2558,7 +2544,7 @@ static ssize_t rbd_snap_add(struct device *dev, if (ret < 0) goto err_unlock; - ret = __rbd_update_snaps(rbd_dev); + ret = __rbd_refresh_header(rbd_dev); if (ret < 0) goto err_unlock; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index aa2712060bfb..9a72277a31df 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -513,6 +513,44 @@ static void process_page(unsigned long data) } } +struct mm_plug_cb { + struct blk_plug_cb cb; + struct cardinfo *card; +}; + +static void mm_unplug(struct blk_plug_cb *cb) +{ + struct mm_plug_cb *mmcb = container_of(cb, struct mm_plug_cb, cb); + + spin_lock_irq(&mmcb->card->lock); + activate(mmcb->card); + spin_unlock_irq(&mmcb->card->lock); + kfree(mmcb); +} + +static int mm_check_plugged(struct cardinfo *card) +{ + struct blk_plug *plug = current->plug; + struct mm_plug_cb *mmcb; + + if (!plug) + return 0; + + list_for_each_entry(mmcb, &plug->cb_list, cb.list) { + if (mmcb->cb.callback == mm_unplug && mmcb->card == card) + return 1; + } + /* Not currently on the callback list */ + mmcb = kmalloc(sizeof(*mmcb), GFP_ATOMIC); + if (!mmcb) + return 0; + + mmcb->card = card; + mmcb->cb.callback = mm_unplug; + list_add(&mmcb->cb.list, &plug->cb_list); + return 1; +} + static void mm_make_request(struct request_queue *q, struct bio *bio) { struct cardinfo *card = q->queuedata; @@ -523,6 +561,8 @@ static void mm_make_request(struct request_queue *q, struct bio *bio) *card->biotail = bio; bio->bi_next = NULL; card->biotail = &bio->bi_next; + if (bio->bi_rw & REQ_SYNC || !mm_check_plugged(card)) + activate(card); spin_unlock_irq(&card->lock); return; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 773cf27dc23f..9ad3b5ec1dc1 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -257,6 +257,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, break; case BLKIF_OP_DISCARD: dst->u.discard.flag = src->u.discard.flag; + dst->u.discard.id = src->u.discard.id; dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; @@ -287,6 +288,7 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, break; case BLKIF_OP_DISCARD: dst->u.discard.flag = src->u.discard.flag; + dst->u.discard.id = src->u.discard.id; dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 60eed4bdd2e4..e4fb3374dcd2 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -141,14 +141,36 @@ static int get_id_from_freelist(struct blkfront_info *info) return free; } -static void add_id_to_freelist(struct blkfront_info *info, +static int add_id_to_freelist(struct blkfront_info *info, unsigned long id) { + if (info->shadow[id].req.u.rw.id != id) + return -EINVAL; + if (info->shadow[id].request == NULL) + return -EINVAL; info->shadow[id].req.u.rw.id = info->shadow_free; info->shadow[id].request = NULL; info->shadow_free = id; + return 0; } +static const char *op_name(int op) +{ + static const char *const names[] = { + [BLKIF_OP_READ] = "read", + [BLKIF_OP_WRITE] = "write", + [BLKIF_OP_WRITE_BARRIER] = "barrier", + [BLKIF_OP_FLUSH_DISKCACHE] = "flush", + [BLKIF_OP_DISCARD] = "discard" }; + + if (op < 0 || op >= ARRAY_SIZE(names)) + return "unknown"; + + if (!names[op]) + return "reserved"; + + return names[op]; +} static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) { unsigned int end = minor + nr; @@ -746,20 +768,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; + /* + * The backend has messed up and given us an id that we would + * never have given to it (we stamp it up to BLK_RING_SIZE - + * look in get_id_from_freelist. + */ + if (id >= BLK_RING_SIZE) { + WARN(1, "%s: response to %s has incorrect id (%ld)\n", + info->gd->disk_name, op_name(bret->operation), id); + /* We can't safely get the 'struct request' as + * the id is busted. */ + continue; + } req = info->shadow[id].request; if (bret->operation != BLKIF_OP_DISCARD) blkif_completion(&info->shadow[id]); - add_id_to_freelist(info, id); + if (add_id_to_freelist(info, id)) { + WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", + info->gd->disk_name, op_name(bret->operation), id); + continue; + } error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; - printk(KERN_WARNING "blkfront: %s: discard op failed\n", - info->gd->disk_name); + printk(KERN_WARNING "blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret->operation)); error = -EOPNOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; @@ -771,18 +809,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { - printk(KERN_WARNING "blkfront: %s: write %s op failed\n", - info->flush_op == BLKIF_OP_WRITE_BARRIER ? - "barrier" : "flush disk cache", - info->gd->disk_name); + printk(KERN_WARNING "blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret->operation)); error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && info->shadow[id].req.u.rw.nr_segments == 0)) { - printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", - info->flush_op == BLKIF_OP_WRITE_BARRIER ? - "barrier" : "flush disk cache", - info->gd->disk_name); + printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", + info->gd->disk_name, op_name(bret->operation)); error = -EOPNOTSUPP; } if (unlikely(error)) { |