diff options
Diffstat (limited to 'drivers')
105 files changed, 10297 insertions, 3514 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7ae2750bb457..d668a8ae602b 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -48,8 +48,8 @@ #include <linux/genalloc.h> #include <linux/pci.h> #include <linux/aer.h> -#include <acpi/apei.h> -#include <acpi/hed.h> + +#include <acpi/ghes.h> #include <asm/mce.h> #include <asm/tlbflush.h> #include <asm/nmi.h> @@ -84,42 +84,6 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) -/* - * One struct ghes is created for each generic hardware error source. - * It provides the context for APEI hardware error timer/IRQ/SCI/NMI - * handler. - * - * estatus: memory buffer for error status block, allocated during - * HEST parsing. - */ -#define GHES_TO_CLEAR 0x0001 -#define GHES_EXITING 0x0002 - -struct ghes { - struct acpi_hest_generic *generic; - struct acpi_hest_generic_status *estatus; - u64 buffer_paddr; - unsigned long flags; - union { - struct list_head list; - struct timer_list timer; - unsigned int irq; - }; -}; - -struct ghes_estatus_node { - struct llist_node llnode; - struct acpi_hest_generic *generic; -}; - -struct ghes_estatus_cache { - u32 estatus_len; - atomic_t count; - struct acpi_hest_generic *generic; - unsigned long long time_in; - struct rcu_head rcu; -}; - bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -333,13 +297,6 @@ static void ghes_fini(struct ghes *ghes) apei_unmap_generic_address(&ghes->generic->error_status_address); } -enum { - GHES_SEV_NO = 0x0, - GHES_SEV_CORRECTED = 0x1, - GHES_SEV_RECOVERABLE = 0x2, - GHES_SEV_PANIC = 0x3, -}; - static inline int ghes_severity(int severity) { switch (severity) { @@ -452,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) +static void ghes_do_proc(struct ghes *ghes, + const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; struct acpi_hest_generic_data *gdata; @@ -464,6 +422,8 @@ static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err; mem_err = (struct cper_sec_mem_err *)(gdata+1); + ghes_edac_report_mem_error(ghes, sev, mem_err); + #ifdef CONFIG_X86_MCE apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, mem_err); @@ -682,7 +642,7 @@ static int ghes_proc(struct ghes *ghes) if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } - ghes_do_proc(ghes->estatus); + ghes_do_proc(ghes, ghes->estatus); out: ghes_clear_estatus(ghes); return 0; @@ -775,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) estatus = GHES_ESTATUS_FROM_NODE(estatus_node); len = apei_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); - ghes_do_proc(estatus); + ghes_do_proc(estatus_node->ghes, estatus); if (!ghes_estatus_cached(estatus)) { generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus)) @@ -864,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); if (estatus_node) { + estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); memcpy(estatus, ghes->estatus, len); @@ -942,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev) ghes = NULL; goto err; } + + rc = ghes_edac_register(ghes, &ghes_dev->dev); + if (rc < 0) + goto err; + switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED: ghes->timer.function = ghes_poll_func; @@ -954,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev) if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) { pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } if (request_irq(ghes->irq, ghes_irq_func, 0, "GHES IRQ", ghes)) { pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } break; case ACPI_HEST_NOTIFY_SCI: @@ -986,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev) platform_set_drvdata(ghes_dev, ghes); return 0; +err_edac_unreg: + ghes_edac_unregister(ghes); err: if (ghes) { ghes_fini(ghes); @@ -1038,6 +1006,9 @@ static int ghes_remove(struct platform_device *ghes_dev) } ghes_fini(ghes); + + ghes_edac_unregister(ghes); + kfree(ghes); platform_set_drvdata(ghes_dev, NULL); diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index ff5b745c4705..2a7cb0df176b 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -39,6 +39,8 @@ static int dma_buf_release(struct inode *inode, struct file *file) dmabuf = file->private_data; + BUG_ON(dmabuf->vmapping_counter); + dmabuf->ops->release(dmabuf); kfree(dmabuf); return 0; @@ -445,6 +447,9 @@ EXPORT_SYMBOL_GPL(dma_buf_kunmap); int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, unsigned long pgoff) { + struct file *oldfile; + int ret; + if (WARN_ON(!dmabuf || !vma)) return -EINVAL; @@ -458,14 +463,22 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, return -EINVAL; /* readjust the vma */ - if (vma->vm_file) - fput(vma->vm_file); - - vma->vm_file = get_file(dmabuf->file); - + get_file(dmabuf->file); + oldfile = vma->vm_file; + vma->vm_file = dmabuf->file; vma->vm_pgoff = pgoff; - return dmabuf->ops->mmap(dmabuf, vma); + ret = dmabuf->ops->mmap(dmabuf, vma); + if (ret) { + /* restore old parameters on failure */ + vma->vm_file = oldfile; + fput(dmabuf->file); + } else { + if (oldfile) + fput(oldfile); + } + return ret; + } EXPORT_SYMBOL_GPL(dma_buf_mmap); @@ -481,12 +494,34 @@ EXPORT_SYMBOL_GPL(dma_buf_mmap); */ void *dma_buf_vmap(struct dma_buf *dmabuf) { + void *ptr; + if (WARN_ON(!dmabuf)) return NULL; - if (dmabuf->ops->vmap) - return dmabuf->ops->vmap(dmabuf); - return NULL; + if (!dmabuf->ops->vmap) + return NULL; + + mutex_lock(&dmabuf->lock); + if (dmabuf->vmapping_counter) { + dmabuf->vmapping_counter++; + BUG_ON(!dmabuf->vmap_ptr); + ptr = dmabuf->vmap_ptr; + goto out_unlock; + } + + BUG_ON(dmabuf->vmap_ptr); + + ptr = dmabuf->ops->vmap(dmabuf); + if (IS_ERR_OR_NULL(ptr)) + goto out_unlock; + + dmabuf->vmap_ptr = ptr; + dmabuf->vmapping_counter = 1; + +out_unlock: + mutex_unlock(&dmabuf->lock); + return ptr; } EXPORT_SYMBOL_GPL(dma_buf_vmap); @@ -500,7 +535,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) if (WARN_ON(!dmabuf)) return; - if (dmabuf->ops->vunmap) - dmabuf->ops->vunmap(dmabuf, vaddr); + BUG_ON(!dmabuf->vmap_ptr); + BUG_ON(dmabuf->vmapping_counter == 0); + BUG_ON(dmabuf->vmap_ptr != vaddr); + + mutex_lock(&dmabuf->lock); + if (--dmabuf->vmapping_counter == 0) { + if (dmabuf->ops->vunmap) + dmabuf->ops->vunmap(dmabuf, vaddr); + dmabuf->vmap_ptr = NULL; + } + mutex_unlock(&dmabuf->lock); } EXPORT_SYMBOL_GPL(dma_buf_vunmap); diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 8f12dc78a848..5b5ee79ff236 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -7054,6 +7054,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, else ErrorCode = 0; } + break; default: ErrorCode = -ENOTTY; } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 824e09c4d0d7..5dc0daed8fac 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -63,19 +63,6 @@ config AMIGA_Z2RAM To compile this driver as a module, choose M here: the module will be called z2ram. -config BLK_DEV_XD - tristate "XT hard disk support" - depends on ISA && ISA_DMA_API - select CHECK_SIGNATURE - help - Very old 8 bit hard disk controllers used in the IBM XT computer - will be supported if you say Y here. - - To compile this driver as a module, choose M here: the - module will be called xd. - - It's pretty unlikely that you have one of these: say N. - config GDROM tristate "SEGA Dreamcast GD-ROM drive" depends on SH_DREAMCAST @@ -544,4 +531,14 @@ config BLK_DEV_RBD If unsure, say N. +config BLK_DEV_RSXX + tristate "RamSam PCIe Flash SSD Device Driver" + depends on PCI + help + Device driver for IBM's high speed PCIe SSD + storage devices: RamSan-70 and RamSan-80. + + To compile this driver as a module, choose M here: the + module will be called rsxx. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 17e82df3df74..a3b40232c6ab 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o -obj-$(CONFIG_BLK_DEV_XD) += xd.o obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o @@ -41,4 +40,6 @@ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ + swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f47dccbda1d4..747bb2af69dc 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -162,12 +162,13 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) { - loff_t size, loopsize; + loff_t loopsize; /* Compute loopsize in bytes */ - size = i_size_read(file->f_mapping->host); - loopsize = size - offset; - /* offset is beyond i_size, wierd but possible */ + loopsize = i_size_read(file->f_mapping->host); + if (offset > 0) + loopsize -= offset; + /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; @@ -190,6 +191,7 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; + struct block_device *bdev = lo->lo_device; if (unlikely((loff_t)x != size)) return -EFBIG; @@ -198,6 +200,9 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; set_capacity(lo->lo_disk, x); + bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); + /* let user-space know about the new size */ + kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); return 0; } @@ -1091,10 +1096,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) return err; if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { + lo->lo_sizelimit != info->lo_sizelimit) if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) return -EFBIG; - } + loop_config_discard(lo); memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); @@ -1271,28 +1276,10 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) { - int err; - sector_t sec; - loff_t sz; - - err = -ENXIO; if (unlikely(lo->lo_state != Lo_bound)) - goto out; - err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); - if (unlikely(err)) - goto out; - sec = get_capacity(lo->lo_disk); - /* the width of sector_t may be narrow for bit-shift */ - sz = sec; - sz <<= 9; - mutex_lock(&bdev->bd_mutex); - bd_set_size(bdev, sz); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); - mutex_unlock(&bdev->bd_mutex); + return -ENXIO; - out: - return err; + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } static int lo_ioctl(struct block_device *bdev, fmode_t mode, @@ -1845,11 +1832,15 @@ static int __init loop_init(void) max_part = (1UL << part_shift) - 1; } - if ((1UL << part_shift) > DISK_MAX_PARTS) - return -EINVAL; + if ((1UL << part_shift) > DISK_MAX_PARTS) { + err = -EINVAL; + goto misc_out; + } - if (max_loop > 1UL << (MINORBITS - part_shift)) - return -EINVAL; + if (max_loop > 1UL << (MINORBITS - part_shift)) { + err = -EINVAL; + goto misc_out; + } /* * If max_loop is specified, create that many devices upfront. @@ -1867,8 +1858,10 @@ static int __init loop_init(void) range = 1UL << MINORBITS; } - if (register_blkdev(LOOP_MAJOR, "loop")) - return -EIO; + if (register_blkdev(LOOP_MAJOR, "loop")) { + err = -EIO; + goto misc_out; + } blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); @@ -1881,6 +1874,10 @@ static int __init loop_init(void) printk(KERN_INFO "loop: module loaded\n"); return 0; + +misc_out: + misc_deregister(&loop_misc); + return err; } static int loop_exit_cb(int id, void *ptr, void *data) diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig index 0ba837fc62a8..1fca1f996b45 100644 --- a/drivers/block/mtip32xx/Kconfig +++ b/drivers/block/mtip32xx/Kconfig @@ -4,6 +4,6 @@ config BLK_DEV_PCIESSD_MTIP32XX tristate "Block Device Driver for Micron PCIe SSDs" - depends on PCI + depends on PCI && GENERIC_HARDIRQS help This enables the block driver for Micron PCIe SSDs. diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3fd100990453..11cc9522cdd4 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -88,6 +88,8 @@ static int instance; static int mtip_major; static struct dentry *dfs_parent; +static u32 cpu_use[NR_CPUS]; + static DEFINE_SPINLOCK(rssd_index_lock); static DEFINE_IDA(rssd_index_ida); @@ -296,16 +298,17 @@ static int hba_reset_nosleep(struct driver_data *dd) */ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) { - atomic_set(&port->commands[tag].active, 1); + int group = tag >> 5; - spin_lock(&port->cmd_issue_lock); + atomic_set(&port->commands[tag].active, 1); + /* guard SACT and CI registers */ + spin_lock(&port->cmd_issue_lock[group]); writel((1 << MTIP_TAG_BIT(tag)), port->s_active[MTIP_TAG_INDEX(tag)]); writel((1 << MTIP_TAG_BIT(tag)), port->cmd_issue[MTIP_TAG_INDEX(tag)]); - - spin_unlock(&port->cmd_issue_lock); + spin_unlock(&port->cmd_issue_lock[group]); /* Set the command's timeout value.*/ port->commands[tag].comp_time = jiffies + msecs_to_jiffies( @@ -964,56 +967,56 @@ handle_tfe_exit: /* * Handle a set device bits interrupt */ -static inline void mtip_process_sdbf(struct driver_data *dd) +static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, + u32 completed) { - struct mtip_port *port = dd->port; - int group, tag, bit; - u32 completed; + struct driver_data *dd = port->dd; + int tag, bit; struct mtip_cmd *command; - /* walk all bits in all slot groups */ - for (group = 0; group < dd->slot_groups; group++) { - completed = readl(port->completed[group]); - if (!completed) - continue; + if (!completed) { + WARN_ON_ONCE(!completed); + return; + } + /* clear completed status register in the hardware.*/ + writel(completed, port->completed[group]); - /* clear completed status register in the hardware.*/ - writel(completed, port->completed[group]); + /* Process completed commands. */ + for (bit = 0; (bit < 32) && completed; bit++) { + if (completed & 0x01) { + tag = (group << 5) | bit; - /* Process completed commands. */ - for (bit = 0; - (bit < 32) && completed; - bit++, completed >>= 1) { - if (completed & 0x01) { - tag = (group << 5) | bit; + /* skip internal command slot. */ + if (unlikely(tag == MTIP_TAG_INTERNAL)) + continue; - /* skip internal command slot. */ - if (unlikely(tag == MTIP_TAG_INTERNAL)) - continue; + command = &port->commands[tag]; + /* make internal callback */ + if (likely(command->comp_func)) { + command->comp_func( + port, + tag, + command->comp_data, + 0); + } else { + dev_warn(&dd->pdev->dev, + "Null completion " + "for tag %d", + tag); - command = &port->commands[tag]; - /* make internal callback */ - if (likely(command->comp_func)) { - command->comp_func( - port, - tag, - command->comp_data, - 0); - } else { - dev_warn(&dd->pdev->dev, - "Null completion " - "for tag %d", - tag); - - if (mtip_check_surprise_removal( - dd->pdev)) { - mtip_command_cleanup(dd); - return; - } + if (mtip_check_surprise_removal( + dd->pdev)) { + mtip_command_cleanup(dd); + return; } } } + completed >>= 1; } + + /* If last, re-enable interrupts */ + if (atomic_dec_return(&dd->irq_workers_active) == 0) + writel(0xffffffff, dd->mmio + HOST_IRQ_STAT); } /* @@ -1072,6 +1075,8 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) struct mtip_port *port = dd->port; u32 hba_stat, port_stat; int rv = IRQ_NONE; + int do_irq_enable = 1, i, workers; + struct mtip_work *twork; hba_stat = readl(dd->mmio + HOST_IRQ_STAT); if (hba_stat) { @@ -1082,8 +1087,42 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) writel(port_stat, port->mmio + PORT_IRQ_STAT); /* Demux port status */ - if (likely(port_stat & PORT_IRQ_SDB_FIS)) - mtip_process_sdbf(dd); + if (likely(port_stat & PORT_IRQ_SDB_FIS)) { + do_irq_enable = 0; + WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0); + + /* Start at 1: group zero is always local? */ + for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS; + i++) { + twork = &dd->work[i]; + twork->completed = readl(port->completed[i]); + if (twork->completed) + workers++; + } + + atomic_set(&dd->irq_workers_active, workers); + if (workers) { + for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) { + twork = &dd->work[i]; + if (twork->completed) + queue_work_on( + twork->cpu_binding, + dd->isr_workq, + &twork->work); + } + + if (likely(dd->work[0].completed)) + mtip_workq_sdbfx(port, 0, + dd->work[0].completed); + + } else { + /* + * Chip quirk: SDB interrupt but nothing + * to complete + */ + do_irq_enable = 1; + } + } if (unlikely(port_stat & PORT_IRQ_ERR)) { if (unlikely(mtip_check_surprise_removal(dd->pdev))) { @@ -1103,21 +1142,13 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) } /* acknowledge interrupt */ - writel(hba_stat, dd->mmio + HOST_IRQ_STAT); + if (unlikely(do_irq_enable)) + writel(hba_stat, dd->mmio + HOST_IRQ_STAT); return rv; } /* - * Wrapper for mtip_handle_irq - * (ignores return code) - */ -static void mtip_tasklet(unsigned long data) -{ - mtip_handle_irq((struct driver_data *) data); -} - -/* * HBA interrupt subroutine. * * @irq IRQ number. @@ -1130,8 +1161,8 @@ static void mtip_tasklet(unsigned long data) static irqreturn_t mtip_irq_handler(int irq, void *instance) { struct driver_data *dd = instance; - tasklet_schedule(&dd->tasklet); - return IRQ_HANDLED; + + return mtip_handle_irq(dd); } static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) @@ -1489,6 +1520,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) } #endif + /* Demux ID.DRAT & ID.RZAT to determine trim support */ + if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5)) + port->dd->trim_supp = true; + else + port->dd->trim_supp = false; + /* Set the identify buffer as valid. */ port->identify_valid = 1; @@ -1676,6 +1713,81 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, } /* + * Trim unused sectors + * + * @dd pointer to driver_data structure + * @lba starting lba + * @len # of 512b sectors to trim + * + * return value + * -ENOMEM Out of dma memory + * -EINVAL Invalid parameters passed in, trim not supported + * -EIO Error submitting trim request to hw + */ +static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len) +{ + int i, rv = 0; + u64 tlba, tlen, sect_left; + struct mtip_trim_entry *buf; + dma_addr_t dma_addr; + struct host_to_dev_fis fis; + + if (!len || dd->trim_supp == false) + return -EINVAL; + + /* Trim request too big */ + WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES)); + + /* Trim request not aligned on 4k boundary */ + WARN_ON(len % 8 != 0); + + /* Warn if vu_trim structure is too big */ + WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE); + + /* Allocate a DMA buffer for the trim structure */ + buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + memset(buf, 0, ATA_SECT_SIZE); + + for (i = 0, sect_left = len, tlba = lba; + i < MTIP_MAX_TRIM_ENTRIES && sect_left; + i++) { + tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ? + MTIP_MAX_TRIM_ENTRY_LEN : + sect_left); + buf[i].lba = __force_bit2int cpu_to_le32(tlba); + buf[i].range = __force_bit2int cpu_to_le16(tlen); + tlba += tlen; + sect_left -= tlen; + } + WARN_ON(sect_left != 0); + + /* Build the fis */ + memset(&fis, 0, sizeof(struct host_to_dev_fis)); + fis.type = 0x27; + fis.opts = 1 << 7; + fis.command = 0xfb; + fis.features = 0x60; + fis.sect_count = 1; + fis.device = ATA_DEVICE_OBS; + + if (mtip_exec_internal_command(dd->port, + &fis, + 5, + dma_addr, + ATA_SECT_SIZE, + 0, + GFP_KERNEL, + MTIP_TRIM_TIMEOUT_MS) < 0) + rv = -EIO; + + dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr); + return rv; +} + +/* * Get the drive capacity. * * @dd Pointer to the device data structure. @@ -3005,20 +3117,24 @@ static int mtip_hw_init(struct driver_data *dd) hba_setup(dd); - tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd); - - dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL); + dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL, + dd->numa_node); if (!dd->port) { dev_err(&dd->pdev->dev, "Memory allocation: port structure\n"); return -ENOMEM; } + /* Continue workqueue setup */ + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + dd->work[i].port = dd->port; + /* Counting semaphore to track command slot usage */ sema_init(&dd->port->cmd_slot, num_command_slots - 1); /* Spinlock to prevent concurrent issue */ - spin_lock_init(&dd->port->cmd_issue_lock); + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + spin_lock_init(&dd->port->cmd_issue_lock[i]); /* Set the port mmio base address. */ dd->port->mmio = dd->mmio + PORT_OFFSET; @@ -3165,6 +3281,7 @@ static int mtip_hw_init(struct driver_data *dd) "Unable to allocate IRQ %d\n", dd->pdev->irq); goto out2; } + irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding)); /* Enable interrupts on the HBA. */ writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, @@ -3241,7 +3358,8 @@ out3: writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, dd->mmio + HOST_CTL); - /*Release the IRQ. */ + /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); out2: @@ -3291,11 +3409,9 @@ static int mtip_hw_exit(struct driver_data *dd) del_timer_sync(&dd->port->cmd_timer); /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); - /* Stop the bottom half tasklet. */ - tasklet_kill(&dd->tasklet); - /* Free the command/command header memory. */ dmam_free_coherent(&dd->pdev->dev, HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), @@ -3641,6 +3757,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) } } + if (unlikely(bio->bi_rw & REQ_DISCARD)) { + bio_endio(bio, mtip_send_trim(dd, bio->bi_sector, + bio_sectors(bio))); + return; + } + if (unlikely(!bio_has_data(bio))) { blk_queue_flush(queue, 0); bio_endio(bio, 0); @@ -3711,7 +3833,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto protocol_init_error; } - dd->disk = alloc_disk(MTIP_MAX_MINORS); + dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node); if (dd->disk == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate gendisk structure\n"); @@ -3755,7 +3877,7 @@ static int mtip_block_initialize(struct driver_data *dd) skip_create_disk: /* Allocate the request queue. */ - dd->queue = blk_alloc_queue(GFP_KERNEL); + dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node); if (dd->queue == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); @@ -3783,6 +3905,15 @@ skip_create_disk: */ blk_queue_flush(dd->queue, 0); + /* Signal trim support */ + if (dd->trim_supp == true) { + set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags); + dd->queue->limits.discard_granularity = 4096; + blk_queue_max_discard_sectors(dd->queue, + MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); + dd->queue->limits.discard_zeroes_data = 0; + } + /* Set the capacity of the device in 512 byte sectors. */ if (!(mtip_hw_get_capacity(dd, &capacity))) { dev_warn(&dd->pdev->dev, @@ -3813,9 +3944,8 @@ skip_create_disk: start_service_thread: sprintf(thd_name, "mtip_svc_thd_%02d", index); - - dd->mtip_svc_handler = kthread_run(mtip_service_thread, - dd, thd_name); + dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread, + dd, dd->numa_node, thd_name); if (IS_ERR(dd->mtip_svc_handler)) { dev_err(&dd->pdev->dev, "service thread failed to start\n"); @@ -3823,7 +3953,7 @@ start_service_thread: rv = -EFAULT; goto kthread_run_error; } - + wake_up_process(dd->mtip_svc_handler); if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) rv = wait_for_rebuild; @@ -3963,6 +4093,56 @@ static int mtip_block_resume(struct driver_data *dd) return 0; } +static void drop_cpu(int cpu) +{ + cpu_use[cpu]--; +} + +static int get_least_used_cpu_on_node(int node) +{ + int cpu, least_used_cpu, least_cnt; + const struct cpumask *node_mask; + + node_mask = cpumask_of_node(node); + least_used_cpu = cpumask_first(node_mask); + least_cnt = cpu_use[least_used_cpu]; + cpu = least_used_cpu; + + for_each_cpu(cpu, node_mask) { + if (cpu_use[cpu] < least_cnt) { + least_used_cpu = cpu; + least_cnt = cpu_use[cpu]; + } + } + cpu_use[least_used_cpu]++; + return least_used_cpu; +} + +/* Helper for selecting a node in round robin mode */ +static inline int mtip_get_next_rr_node(void) +{ + static int next_node = -1; + + if (next_node == -1) { + next_node = first_online_node; + return next_node; + } + + next_node = next_online_node(next_node); + if (next_node == MAX_NUMNODES) + next_node = first_online_node; + return next_node; +} + +static DEFINE_HANDLER(0); +static DEFINE_HANDLER(1); +static DEFINE_HANDLER(2); +static DEFINE_HANDLER(3); +static DEFINE_HANDLER(4); +static DEFINE_HANDLER(5); +static DEFINE_HANDLER(6); +static DEFINE_HANDLER(7); + /* * Called for each supported PCI device detected. * @@ -3977,9 +4157,25 @@ static int mtip_pci_probe(struct pci_dev *pdev, { int rv = 0; struct driver_data *dd = NULL; + char cpu_list[256]; + const struct cpumask *node_mask; + int cpu, i = 0, j = 0; + int my_node = NUMA_NO_NODE; /* Allocate memory for this devices private data. */ - dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL); + my_node = pcibus_to_node(pdev->bus); + if (my_node != NUMA_NO_NODE) { + if (!node_online(my_node)) + my_node = mtip_get_next_rr_node(); + } else { + dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n"); + my_node = mtip_get_next_rr_node(); + } + dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n", + my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev), + cpu_to_node(smp_processor_id()), smp_processor_id()); + + dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); if (dd == NULL) { dev_err(&pdev->dev, "Unable to allocate memory for driver data\n"); @@ -4016,19 +4212,82 @@ static int mtip_pci_probe(struct pci_dev *pdev, } } - pci_set_master(pdev); + /* Copy the info we may need later into the private data structure. */ + dd->major = mtip_major; + dd->instance = instance; + dd->pdev = pdev; + dd->numa_node = my_node; + memset(dd->workq_name, 0, 32); + snprintf(dd->workq_name, 31, "mtipq%d", dd->instance); + + dd->isr_workq = create_workqueue(dd->workq_name); + if (!dd->isr_workq) { + dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); + goto block_initialize_err; + } + + memset(cpu_list, 0, sizeof(cpu_list)); + + node_mask = cpumask_of_node(dd->numa_node); + if (!cpumask_empty(node_mask)) { + for_each_cpu(cpu, node_mask) + { + snprintf(&cpu_list[j], 256 - j, "%d ", cpu); + j = strlen(cpu_list); + } + + dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n", + dd->numa_node, + topology_physical_package_id(cpumask_first(node_mask)), + nr_cpus_node(dd->numa_node), + cpu_list); + } else + dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n"); + + dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node); + dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n", + cpu_to_node(dd->isr_binding), dd->isr_binding); + + /* first worker context always runs in ISR */ + dd->work[0].cpu_binding = dd->isr_binding; + dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[3].cpu_binding = dd->work[0].cpu_binding; + dd->work[4].cpu_binding = dd->work[1].cpu_binding; + dd->work[5].cpu_binding = dd->work[2].cpu_binding; + dd->work[6].cpu_binding = dd->work[2].cpu_binding; + dd->work[7].cpu_binding = dd->work[1].cpu_binding; + + /* Log the bindings */ + for_each_present_cpu(cpu) { + memset(cpu_list, 0, sizeof(cpu_list)); + for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) { + if (dd->work[i].cpu_binding == cpu) { + snprintf(&cpu_list[j], 256 - j, "%d ", i); + j = strlen(cpu_list); + } + } + if (j) + dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list); + } + + INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0); + INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1); + INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2); + INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3); + INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4); + INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5); + INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6); + INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7); + + pci_set_master(pdev); if (pci_enable_msi(pdev)) { dev_warn(&pdev->dev, "Unable to enable MSI interrupt.\n"); goto block_initialize_err; } - /* Copy the info we may need later into the private data structure. */ - dd->major = mtip_major; - dd->instance = instance; - dd->pdev = pdev; - /* Initialize the block layer. */ rv = mtip_block_initialize(dd); if (rv < 0) { @@ -4048,7 +4307,13 @@ static int mtip_pci_probe(struct pci_dev *pdev, block_initialize_err: pci_disable_msi(pdev); - + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } setmask_err: pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); @@ -4089,6 +4354,14 @@ static void mtip_pci_remove(struct pci_dev *pdev) /* Clean up the block layer. */ mtip_block_remove(dd); + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } + pci_disable_msi(pdev); kfree(dd); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index b1742640556a..3bffff5f670c 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -164,6 +164,35 @@ struct smart_attr { u8 res[3]; } __packed; +struct mtip_work { + struct work_struct work; + void *port; + int cpu_binding; + u32 completed; +} ____cacheline_aligned_in_smp; + +#define DEFINE_HANDLER(group) \ + void mtip_workq_sdbf##group(struct work_struct *work) \ + { \ + struct mtip_work *w = (struct mtip_work *) work; \ + mtip_workq_sdbfx(w->port, group, w->completed); \ + } + +#define MTIP_TRIM_TIMEOUT_MS 240000 +#define MTIP_MAX_TRIM_ENTRIES 8 +#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 + +struct mtip_trim_entry { + u32 lba; /* starting lba of region */ + u16 rsvd; /* unused */ + u16 range; /* # of 512b blocks to trim */ +} __packed; + +struct mtip_trim { + /* Array of regions to trim */ + struct mtip_trim_entry entry[MTIP_MAX_TRIM_ENTRIES]; +} __packed; + /* Register Frame Information Structure (FIS), host to device. */ struct host_to_dev_fis { /* @@ -424,7 +453,7 @@ struct mtip_port { */ struct semaphore cmd_slot; /* Spinlock for working around command-issue bug. */ - spinlock_t cmd_issue_lock; + spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS]; }; /* @@ -447,9 +476,6 @@ struct driver_data { struct mtip_port *port; /* Pointer to the port data structure. */ - /* Tasklet used to process the bottom half of the ISR. */ - struct tasklet_struct tasklet; - unsigned product_type; /* magic value declaring the product type */ unsigned slot_groups; /* number of slot groups the product supports */ @@ -461,6 +487,20 @@ struct driver_data { struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ struct dentry *dfs_node; + + bool trim_supp; /* flag indicating trim support */ + + int numa_node; /* NUMA support */ + + char workq_name[32]; + + struct workqueue_struct *isr_workq; + + struct mtip_work work[MTIP_MAX_SLOT_GROUPS]; + + atomic_t irq_workers_active; + + int isr_binding; }; #endif diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 89576a0b3f2e..6c81a4c040b9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -52,9 +52,12 @@ #define SECTOR_SHIFT 9 #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) -/* It might be useful to have this defined elsewhere too */ +/* It might be useful to have these defined elsewhere */ -#define U64_MAX ((u64) (~0ULL)) +#define U8_MAX ((u8) (~0U)) +#define U16_MAX ((u16) (~0U)) +#define U32_MAX ((u32) (~0U)) +#define U64_MAX ((u64) (~0ULL)) #define RBD_DRV_NAME "rbd" #define RBD_DRV_NAME_LONG "rbd (rados block device)" @@ -66,7 +69,6 @@ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ -#define RBD_MAX_OPT_LEN 1024 #define RBD_SNAP_HEAD_NAME "-" @@ -93,8 +95,6 @@ #define DEV_NAME_LEN 32 #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) -#define RBD_READ_ONLY_DEFAULT false - /* * block device image metadata (in-memory version) */ @@ -119,16 +119,33 @@ struct rbd_image_header { * An rbd image specification. * * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely - * identify an image. + * identify an image. Each rbd_dev structure includes a pointer to + * an rbd_spec structure that encapsulates this identity. + * + * Each of the id's in an rbd_spec has an associated name. For a + * user-mapped image, the names are supplied and the id's associated + * with them are looked up. For a layered image, a parent image is + * defined by the tuple, and the names are looked up. + * + * An rbd_dev structure contains a parent_spec pointer which is + * non-null if the image it represents is a child in a layered + * image. This pointer will refer to the rbd_spec structure used + * by the parent rbd_dev for its own identity (i.e., the structure + * is shared between the parent and child). + * + * Since these structures are populated once, during the discovery + * phase of image construction, they are effectively immutable so + * we make no effort to synchronize access to them. + * + * Note that code herein does not assume the image name is known (it + * could be a null pointer). */ struct rbd_spec { u64 pool_id; char *pool_name; char *image_id; - size_t image_id_len; char *image_name; - size_t image_name_len; u64 snap_id; char *snap_name; @@ -136,10 +153,6 @@ struct rbd_spec { struct kref kref; }; -struct rbd_options { - bool read_only; -}; - /* * an instance of the client. multiple devices may share an rbd client. */ @@ -149,37 +162,76 @@ struct rbd_client { struct list_head node; }; -/* - * a request completion status - */ -struct rbd_req_status { - int done; - int rc; - u64 bytes; +struct rbd_img_request; +typedef void (*rbd_img_callback_t)(struct rbd_img_request *); + +#define BAD_WHICH U32_MAX /* Good which or bad which, which? */ + +struct rbd_obj_request; +typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); + +enum obj_request_type { + OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES }; -/* - * a collection of requests - */ -struct rbd_req_coll { - int total; - int num_done; +struct rbd_obj_request { + const char *object_name; + u64 offset; /* object start byte */ + u64 length; /* bytes from offset */ + + struct rbd_img_request *img_request; + struct list_head links; /* img_request->obj_requests */ + u32 which; /* posn image request list */ + + enum obj_request_type type; + union { + struct bio *bio_list; + struct { + struct page **pages; + u32 page_count; + }; + }; + + struct ceph_osd_request *osd_req; + + u64 xferred; /* bytes transferred */ + u64 version; + int result; + atomic_t done; + + rbd_obj_callback_t callback; + struct completion completion; + struct kref kref; - struct rbd_req_status status[0]; }; -/* - * a single io request - */ -struct rbd_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct page **pages; /* list of used pages */ - u64 len; - int coll_index; - struct rbd_req_coll *coll; +struct rbd_img_request { + struct request *rq; + struct rbd_device *rbd_dev; + u64 offset; /* starting image byte offset */ + u64 length; /* byte count from offset */ + bool write_request; /* false for read */ + union { + struct ceph_snap_context *snapc; /* for writes */ + u64 snap_id; /* for reads */ + }; + spinlock_t completion_lock;/* protects next_completion */ + u32 next_completion; + rbd_img_callback_t callback; + + u32 obj_request_count; + struct list_head obj_requests; /* rbd_obj_request structs */ + + struct kref kref; }; +#define for_each_obj_request(ireq, oreq) \ + list_for_each_entry(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_from(ireq, oreq) \ + list_for_each_entry_from(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_safe(ireq, oreq, n) \ + list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) + struct rbd_snap { struct device dev; const char *name; @@ -209,16 +261,18 @@ struct rbd_device { char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ - spinlock_t lock; /* queue lock */ + spinlock_t lock; /* queue, flags, open_count */ struct rbd_image_header header; - bool exists; + unsigned long flags; /* possibly lock protected */ struct rbd_spec *spec; char *header_name; + struct ceph_file_layout layout; + struct ceph_osd_event *watch_event; - struct ceph_osd_request *watch_request; + struct rbd_obj_request *watch_request; struct rbd_spec *parent_spec; u64 parent_overlap; @@ -235,7 +289,19 @@ struct rbd_device { /* sysfs related */ struct device dev; - unsigned long open_count; + unsigned long open_count; /* protected by lock */ +}; + +/* + * Flag bits for rbd_dev->flags. If atomicity is required, + * rbd_dev->lock is used to protect access. + * + * Currently, only the "removing" flag (which is coupled with the + * "open_count" field) requires atomic access. + */ +enum rbd_dev_flags { + RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ + RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ }; static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ @@ -277,6 +343,33 @@ static struct device rbd_root_dev = { .release = rbd_root_dev_release, }; +static __printf(2, 3) +void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (!rbd_dev) + printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); + else if (rbd_dev->disk) + printk(KERN_WARNING "%s: %s: %pV\n", + RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_name) + printk(KERN_WARNING "%s: image %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_id) + printk(KERN_WARNING "%s: id %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); + else /* punt */ + printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", + RBD_DRV_NAME, rbd_dev, &vaf); + va_end(args); +} + #ifdef RBD_DEBUG #define rbd_assert(expr) \ if (unlikely(!(expr))) { \ @@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { struct rbd_device *rbd_dev = bdev->bd_disk->private_data; + bool removing = false; if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) return -EROFS; + spin_lock_irq(&rbd_dev->lock); + if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) + removing = true; + else + rbd_dev->open_count++; + spin_unlock_irq(&rbd_dev->lock); + if (removing) + return -ENOENT; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); (void) get_device(&rbd_dev->dev); set_device_ro(bdev, rbd_dev->mapping.read_only); - rbd_dev->open_count++; mutex_unlock(&ctl_mutex); return 0; @@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) static int rbd_release(struct gendisk *disk, fmode_t mode) { struct rbd_device *rbd_dev = disk->private_data; + unsigned long open_count_before; + + spin_lock_irq(&rbd_dev->lock); + open_count_before = rbd_dev->open_count--; + spin_unlock_irq(&rbd_dev->lock); + rbd_assert(open_count_before > 0); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rbd_assert(rbd_dev->open_count > 0); - rbd_dev->open_count--; put_device(&rbd_dev->dev); mutex_unlock(&ctl_mutex); @@ -337,7 +443,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) struct rbd_client *rbdc; int ret = -ENOMEM; - dout("rbd_client_create\n"); + dout("%s:\n", __func__); rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); if (!rbdc) goto out_opt; @@ -361,8 +467,8 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) spin_unlock(&rbd_client_list_lock); mutex_unlock(&ctl_mutex); + dout("%s: rbdc %p\n", __func__, rbdc); - dout("rbd_client_create created %p\n", rbdc); return rbdc; out_err: @@ -373,6 +479,8 @@ out_mutex: out_opt: if (ceph_opts) ceph_destroy_options(ceph_opts); + dout("%s: error %d\n", __func__, ret); + return ERR_PTR(ret); } @@ -426,6 +534,12 @@ static match_table_t rbd_opts_tokens = { {-1, NULL} }; +struct rbd_options { + bool read_only; +}; + +#define RBD_READ_ONLY_DEFAULT false + static int parse_rbd_opts_token(char *c, void *private) { struct rbd_options *rbd_opts = private; @@ -493,7 +607,7 @@ static void rbd_client_release(struct kref *kref) { struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); - dout("rbd_release_client %p\n", rbdc); + dout("%s: rbdc %p\n", __func__, rbdc); spin_lock(&rbd_client_list_lock); list_del(&rbdc->node); spin_unlock(&rbd_client_list_lock); @@ -512,18 +626,6 @@ static void rbd_put_client(struct rbd_client *rbdc) kref_put(&rbdc->kref, rbd_client_release); } -/* - * Destroy requests collection - */ -static void rbd_coll_release(struct kref *kref) -{ - struct rbd_req_coll *coll = - container_of(kref, struct rbd_req_coll, kref); - - dout("rbd_coll_release %p\n", coll); - kfree(coll); -} - static bool rbd_image_format_valid(u32 image_format) { return image_format == 1 || image_format == 2; @@ -707,7 +809,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) goto done; rbd_dev->mapping.read_only = true; } - rbd_dev->exists = true; + set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); + done: return ret; } @@ -724,7 +827,7 @@ static void rbd_header_free(struct rbd_image_header *header) header->snapc = NULL; } -static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) +static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) { char *name; u64 segment; @@ -767,23 +870,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev, return length; } -static int rbd_get_num_segments(struct rbd_image_header *header, - u64 ofs, u64 len) -{ - u64 start_seg; - u64 end_seg; - - if (!len) - return 0; - if (len - 1 > U64_MAX - ofs) - return -ERANGE; - - start_seg = ofs >> header->obj_order; - end_seg = (ofs + len - 1) >> header->obj_order; - - return end_seg - start_seg + 1; -} - /* * returns the size of an object in the image */ @@ -949,8 +1035,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src, unsigned int bi_size; struct bio *bio; - if (!bi) + if (!bi) { + rbd_warn(NULL, "bio_chain exhausted with %u left", len); goto out_err; /* EINVAL; ran out of bio's */ + } bi_size = min_t(unsigned int, bi->bi_size - off, len); bio = bio_clone_range(bi, off, bi_size, gfpmask); if (!bio) @@ -976,399 +1064,721 @@ out_err: return NULL; } -/* - * helpers for osd request op vectors. - */ -static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, - int opcode, u32 payload_len) +static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { - struct ceph_osd_req_op *ops; + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_get(&obj_request->kref); +} + +static void rbd_obj_request_destroy(struct kref *kref); +static void rbd_obj_request_put(struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request != NULL); + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_put(&obj_request->kref, rbd_obj_request_destroy); +} + +static void rbd_img_request_get(struct rbd_img_request *img_request) +{ + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_get(&img_request->kref); +} + +static void rbd_img_request_destroy(struct kref *kref); +static void rbd_img_request_put(struct rbd_img_request *img_request) +{ + rbd_assert(img_request != NULL); + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_put(&img_request->kref, rbd_img_request_destroy); +} + +static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->img_request == NULL); + + rbd_obj_request_get(obj_request); + obj_request->img_request = img_request; + obj_request->which = img_request->obj_request_count; + rbd_assert(obj_request->which != BAD_WHICH); + img_request->obj_request_count++; + list_add_tail(&obj_request->links, &img_request->obj_requests); + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); +} - ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); - if (!ops) +static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->which != BAD_WHICH); + + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); + list_del(&obj_request->links); + rbd_assert(img_request->obj_request_count > 0); + img_request->obj_request_count--; + rbd_assert(obj_request->which == img_request->obj_request_count); + obj_request->which = BAD_WHICH; + rbd_assert(obj_request->img_request == img_request); + obj_request->img_request = NULL; + obj_request->callback = NULL; + rbd_obj_request_put(obj_request); +} + +static bool obj_request_type_valid(enum obj_request_type type) +{ + switch (type) { + case OBJ_REQUEST_NODATA: + case OBJ_REQUEST_BIO: + case OBJ_REQUEST_PAGES: + return true; + default: + return false; + } +} + +static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...) +{ + struct ceph_osd_req_op *op; + va_list args; + size_t size; + + op = kzalloc(sizeof (*op), GFP_NOIO); + if (!op) return NULL; + op->op = opcode; + va_start(args, opcode); + switch (opcode) { + case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_WRITE: + /* rbd_osd_req_op_create(READ, offset, length) */ + /* rbd_osd_req_op_create(WRITE, offset, length) */ + op->extent.offset = va_arg(args, u64); + op->extent.length = va_arg(args, u64); + if (opcode == CEPH_OSD_OP_WRITE) + op->payload_len = op->extent.length; + break; + case CEPH_OSD_OP_STAT: + break; + case CEPH_OSD_OP_CALL: + /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */ + op->cls.class_name = va_arg(args, char *); + size = strlen(op->cls.class_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.class_len = size; + op->payload_len = size; + + op->cls.method_name = va_arg(args, char *); + size = strlen(op->cls.method_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.method_len = size; + op->payload_len += size; + + op->cls.argc = 0; + op->cls.indata = va_arg(args, void *); + size = va_arg(args, size_t); + rbd_assert(size <= (size_t) U32_MAX); + op->cls.indata_len = (u32) size; + op->payload_len += size; + break; + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */ + /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */ + op->watch.cookie = va_arg(args, u64); + op->watch.ver = va_arg(args, u64); + op->watch.ver = cpu_to_le64(op->watch.ver); + if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int)) + op->watch.flag = (u8) 1; + break; + default: + rbd_warn(NULL, "unsupported opcode %hu\n", opcode); + kfree(op); + op = NULL; + break; + } + va_end(args); - ops[0].op = opcode; + return op; +} - /* - * op extent offset and length will be set later on - * in calc_raw_layout() - */ - ops[0].payload_len = payload_len; +static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op) +{ + kfree(op); +} + +static int rbd_obj_request_submit(struct ceph_osd_client *osdc, + struct rbd_obj_request *obj_request) +{ + dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request); - return ops; + return ceph_osdc_start_request(osdc, obj_request->osd_req, false); } -static void rbd_destroy_ops(struct ceph_osd_req_op *ops) +static void rbd_img_request_complete(struct rbd_img_request *img_request) { - kfree(ops); + dout("%s: img %p\n", __func__, img_request); + if (img_request->callback) + img_request->callback(img_request); + else + rbd_img_request_put(img_request); } -static void rbd_coll_end_req_index(struct request *rq, - struct rbd_req_coll *coll, - int index, - int ret, u64 len) +/* Caller is responsible for rbd_obj_request_destroy(obj_request) */ + +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) { - struct request_queue *q; - int min, max, i; + dout("%s: obj %p\n", __func__, obj_request); - dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", - coll, index, ret, (unsigned long long) len); + return wait_for_completion_interruptible(&obj_request->completion); +} - if (!rq) - return; +static void obj_request_done_init(struct rbd_obj_request *obj_request) +{ + atomic_set(&obj_request->done, 0); + smp_wmb(); +} - if (!coll) { - blk_end_request(rq, ret, len); - return; +static void obj_request_done_set(struct rbd_obj_request *obj_request) +{ + int done; + + done = atomic_inc_return(&obj_request->done); + if (done > 1) { + struct rbd_img_request *img_request = obj_request->img_request; + struct rbd_device *rbd_dev; + + rbd_dev = img_request ? img_request->rbd_dev : NULL; + rbd_warn(rbd_dev, "obj_request %p was already done\n", + obj_request); } +} - q = rq->q; - - spin_lock_irq(q->queue_lock); - coll->status[index].done = 1; - coll->status[index].rc = ret; - coll->status[index].bytes = len; - max = min = coll->num_done; - while (max < coll->total && coll->status[max].done) - max++; - - for (i = min; i<max; i++) { - __blk_end_request(rq, coll->status[i].rc, - coll->status[i].bytes); - coll->num_done++; - kref_put(&coll->kref, rbd_coll_release); +static bool obj_request_done_test(struct rbd_obj_request *obj_request) +{ + smp_mb(); + return atomic_read(&obj_request->done) != 0; +} + +static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p cb %p\n", __func__, obj_request, + obj_request->callback); + if (obj_request->callback) + obj_request->callback(obj_request); + else + complete_all(&obj_request->completion); +} + +static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} + +static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, + obj_request->result, obj_request->xferred, obj_request->length); + /* + * ENOENT means a hole in the object. We zero-fill the + * entire length of the request. A short read also implies + * zero-fill to the end of the request. Either way we + * update the xferred count to indicate the whole request + * was satisfied. + */ + if (obj_request->result == -ENOENT) { + zero_bio_chain(obj_request->bio_list, 0); + obj_request->result = 0; + obj_request->xferred = obj_request->length; + } else if (obj_request->xferred < obj_request->length && + !obj_request->result) { + zero_bio_chain(obj_request->bio_list, obj_request->xferred); + obj_request->xferred = obj_request->length; } - spin_unlock_irq(q->queue_lock); + obj_request_done_set(obj_request); } -static void rbd_coll_end_req(struct rbd_request *req, - int ret, u64 len) +static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) { - rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); + dout("%s: obj %p result %d %llu\n", __func__, obj_request, + obj_request->result, obj_request->length); + /* + * There is no such thing as a successful short write. + * Our xferred value is the number of bytes transferred + * back. Set it to our originally-requested length. + */ + obj_request->xferred = obj_request->length; + obj_request_done_set(obj_request); } /* - * Send ceph osd request + * For a simple stat call there's nothing to do. We'll do more if + * this is part of a write sequence for a layered image. */ -static int rbd_do_request(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - const char *object_name, u64 ofs, u64 len, - struct bio *bio, - struct page **pages, - int num_pages, - int flags, - struct ceph_osd_req_op *ops, - struct rbd_req_coll *coll, - int coll_index, - void (*rbd_cb)(struct ceph_osd_request *req, - struct ceph_msg *msg), - struct ceph_osd_request **linger_req, - u64 *ver) -{ - struct ceph_osd_request *req; - struct ceph_file_layout *layout; - int ret; - u64 bno; - struct timespec mtime = CURRENT_TIME; - struct rbd_request *req_data; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_client *osdc; +static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} - req_data = kzalloc(sizeof(*req_data), GFP_NOIO); - if (!req_data) { - if (coll) - rbd_coll_end_req_index(rq, coll, coll_index, - -ENOMEM, len); - return -ENOMEM; +static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, + struct ceph_msg *msg) +{ + struct rbd_obj_request *obj_request = osd_req->r_priv; + u16 opcode; + + dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg); + rbd_assert(osd_req == obj_request->osd_req); + rbd_assert(!!obj_request->img_request ^ + (obj_request->which == BAD_WHICH)); + + if (osd_req->r_result < 0) + obj_request->result = osd_req->r_result; + obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); + + WARN_ON(osd_req->r_num_ops != 1); /* For now */ + + /* + * We support a 64-bit length, but ultimately it has to be + * passed to blk_end_request(), which takes an unsigned int. + */ + obj_request->xferred = osd_req->r_reply_op_len[0]; + rbd_assert(obj_request->xferred < (u64) UINT_MAX); + opcode = osd_req->r_request_ops[0].op; + switch (opcode) { + case CEPH_OSD_OP_READ: + rbd_osd_read_callback(obj_request); + break; + case CEPH_OSD_OP_WRITE: + rbd_osd_write_callback(obj_request); + break; + case CEPH_OSD_OP_STAT: + rbd_osd_stat_callback(obj_request); + break; + case CEPH_OSD_OP_CALL: + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + rbd_osd_trivial_callback(obj_request); + break; + default: + rbd_warn(NULL, "%s: unsupported op %hu\n", + obj_request->object_name, (unsigned short) opcode); + break; } - if (coll) { - req_data->coll = coll; - req_data->coll_index = coll_index; + if (obj_request_done_test(obj_request)) + rbd_obj_request_complete(obj_request); +} + +static struct ceph_osd_request *rbd_osd_req_create( + struct rbd_device *rbd_dev, + bool write_request, + struct rbd_obj_request *obj_request, + struct ceph_osd_req_op *op) +{ + struct rbd_img_request *img_request = obj_request->img_request; + struct ceph_snap_context *snapc = NULL; + struct ceph_osd_client *osdc; + struct ceph_osd_request *osd_req; + struct timespec now; + struct timespec *mtime; + u64 snap_id = CEPH_NOSNAP; + u64 offset = obj_request->offset; + u64 length = obj_request->length; + + if (img_request) { + rbd_assert(img_request->write_request == write_request); + if (img_request->write_request) + snapc = img_request->snapc; + else + snap_id = img_request->snap_id; } - dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", - object_name, (unsigned long long) ofs, - (unsigned long long) len, coll, coll_index); + /* Allocate and initialize the request, for the single op */ osdc = &rbd_dev->rbd_client->client->osdc; - req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, - false, GFP_NOIO, pages, bio); - if (!req) { - ret = -ENOMEM; - goto done_pages; + osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC); + if (!osd_req) + return NULL; /* ENOMEM */ + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + rbd_assert(obj_request->bio_list != NULL); + osd_req->r_bio = obj_request->bio_list; + break; + case OBJ_REQUEST_PAGES: + osd_req->r_pages = obj_request->pages; + osd_req->r_num_pages = obj_request->page_count; + osd_req->r_page_alignment = offset & ~PAGE_MASK; + break; } - req->r_callback = rbd_cb; + if (write_request) { + osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; + now = CURRENT_TIME; + mtime = &now; + } else { + osd_req->r_flags = CEPH_OSD_FLAG_READ; + mtime = NULL; /* not needed for reads */ + offset = 0; /* These are not used... */ + length = 0; /* ...for osd read requests */ + } - req_data->rq = rq; - req_data->bio = bio; - req_data->pages = pages; - req_data->len = len; + osd_req->r_callback = rbd_osd_req_callback; + osd_req->r_priv = obj_request; - req->r_priv = req_data; + osd_req->r_oid_len = strlen(obj_request->object_name); + rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); + memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len); - reqhead = req->r_request->front.iov_base; - reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); + osd_req->r_file_layout = rbd_dev->layout; /* struct */ - strncpy(req->r_oid, object_name, sizeof(req->r_oid)); - req->r_oid_len = strlen(req->r_oid); + /* osd_req will get its own reference to snapc (if non-null) */ - layout = &req->r_file_layout; - memset(layout, 0, sizeof(*layout)); - layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_stripe_count = cpu_to_le32(1); - layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); - ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, - req, ops); - rbd_assert(ret == 0); + ceph_osdc_build_request(osd_req, offset, length, 1, op, + snapc, snap_id, mtime); - ceph_osdc_build_request(req, ofs, &len, - ops, - snapc, - &mtime, - req->r_oid, req->r_oid_len); + return osd_req; +} - if (linger_req) { - ceph_osdc_set_request_linger(osdc, req); - *linger_req = req; - } +static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) +{ + ceph_osdc_put_request(osd_req); +} - ret = ceph_osdc_start_request(osdc, req, false); - if (ret < 0) - goto done_err; - - if (!rbd_cb) { - ret = ceph_osdc_wait_request(osdc, req); - if (ver) - *ver = le64_to_cpu(req->r_reassert_version.version); - dout("reassert_ver=%llu\n", - (unsigned long long) - le64_to_cpu(req->r_reassert_version.version)); - ceph_osdc_put_request(req); +/* object_name is assumed to be a non-null pointer and NUL-terminated */ + +static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, + u64 offset, u64 length, + enum obj_request_type type) +{ + struct rbd_obj_request *obj_request; + size_t size; + char *name; + + rbd_assert(obj_request_type_valid(type)); + + size = strlen(object_name) + 1; + obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL); + if (!obj_request) + return NULL; + + name = (char *)(obj_request + 1); + obj_request->object_name = memcpy(name, object_name, size); + obj_request->offset = offset; + obj_request->length = length; + obj_request->which = BAD_WHICH; + obj_request->type = type; + INIT_LIST_HEAD(&obj_request->links); + obj_request_done_init(obj_request); + init_completion(&obj_request->completion); + kref_init(&obj_request->kref); + + dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name, + offset, length, (int)type, obj_request); + + return obj_request; +} + +static void rbd_obj_request_destroy(struct kref *kref) +{ + struct rbd_obj_request *obj_request; + + obj_request = container_of(kref, struct rbd_obj_request, kref); + + dout("%s: obj %p\n", __func__, obj_request); + + rbd_assert(obj_request->img_request == NULL); + rbd_assert(obj_request->which == BAD_WHICH); + + if (obj_request->osd_req) + rbd_osd_req_destroy(obj_request->osd_req); + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + if (obj_request->bio_list) + bio_chain_put(obj_request->bio_list); + break; + case OBJ_REQUEST_PAGES: + if (obj_request->pages) + ceph_release_page_vector(obj_request->pages, + obj_request->page_count); + break; } - return ret; -done_err: - bio_chain_put(req_data->bio); - ceph_osdc_put_request(req); -done_pages: - rbd_coll_end_req(req_data, ret, len); - kfree(req_data); - return ret; + kfree(obj_request); } /* - * Ceph osd op callback + * Caller is responsible for filling in the list of object requests + * that comprises the image request, and the Linux request pointer + * (if there is one). */ -static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) -{ - struct rbd_request *req_data = req->r_priv; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; - __s32 rc; - u64 bytes; - int read_op; - - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); - - dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", - (unsigned long long) bytes, read_op, (int) rc); - - if (rc == -ENOENT && read_op) { - zero_bio_chain(req_data->bio, 0); - rc = 0; - } else if (rc == 0 && read_op && bytes < req_data->len) { - zero_bio_chain(req_data->bio, bytes); - bytes = req_data->len; - } +static struct rbd_img_request *rbd_img_request_create( + struct rbd_device *rbd_dev, + u64 offset, u64 length, + bool write_request) +{ + struct rbd_img_request *img_request; + struct ceph_snap_context *snapc = NULL; - rbd_coll_end_req(req_data, rc, bytes); + img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC); + if (!img_request) + return NULL; - if (req_data->bio) - bio_chain_put(req_data->bio); + if (write_request) { + down_read(&rbd_dev->header_rwsem); + snapc = ceph_get_snap_context(rbd_dev->header.snapc); + up_read(&rbd_dev->header_rwsem); + if (WARN_ON(!snapc)) { + kfree(img_request); + return NULL; /* Shouldn't happen */ + } + } - ceph_osdc_put_request(req); - kfree(req_data); + img_request->rq = NULL; + img_request->rbd_dev = rbd_dev; + img_request->offset = offset; + img_request->length = length; + img_request->write_request = write_request; + if (write_request) + img_request->snapc = snapc; + else + img_request->snap_id = rbd_dev->spec->snap_id; + spin_lock_init(&img_request->completion_lock); + img_request->next_completion = 0; + img_request->callback = NULL; + img_request->obj_request_count = 0; + INIT_LIST_HEAD(&img_request->obj_requests); + kref_init(&img_request->kref); + + rbd_img_request_get(img_request); /* Avoid a warning */ + rbd_img_request_put(img_request); /* TEMPORARY */ + + dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, + write_request ? "write" : "read", offset, length, + img_request); + + return img_request; } -static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) +static void rbd_img_request_destroy(struct kref *kref) { - ceph_osdc_put_request(req); + struct rbd_img_request *img_request; + struct rbd_obj_request *obj_request; + struct rbd_obj_request *next_obj_request; + + img_request = container_of(kref, struct rbd_img_request, kref); + + dout("%s: img %p\n", __func__, img_request); + + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_img_obj_request_del(img_request, obj_request); + rbd_assert(img_request->obj_request_count == 0); + + if (img_request->write_request) + ceph_put_snap_context(img_request->snapc); + + kfree(img_request); } -/* - * Do a synchronous ceph osd operation - */ -static int rbd_req_sync_op(struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - int flags, - struct ceph_osd_req_op *ops, - const char *object_name, - u64 ofs, u64 inbound_size, - char *inbound, - struct ceph_osd_request **linger_req, - u64 *ver) +static int rbd_img_request_fill_bio(struct rbd_img_request *img_request, + struct bio *bio_list) { - int ret; - struct page **pages; - int num_pages; - - rbd_assert(ops != NULL); + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct rbd_obj_request *obj_request = NULL; + struct rbd_obj_request *next_obj_request; + unsigned int bio_offset; + u64 image_offset; + u64 resid; + u16 opcode; + + dout("%s: img %p bio %p\n", __func__, img_request, bio_list); + + opcode = img_request->write_request ? CEPH_OSD_OP_WRITE + : CEPH_OSD_OP_READ; + bio_offset = 0; + image_offset = img_request->offset; + rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT); + resid = img_request->length; + rbd_assert(resid > 0); + while (resid) { + const char *object_name; + unsigned int clone_size; + struct ceph_osd_req_op *op; + u64 offset; + u64 length; + + object_name = rbd_segment_name(rbd_dev, image_offset); + if (!object_name) + goto out_unwind; + offset = rbd_segment_offset(rbd_dev, image_offset); + length = rbd_segment_length(rbd_dev, image_offset, resid); + obj_request = rbd_obj_request_create(object_name, + offset, length, + OBJ_REQUEST_BIO); + kfree(object_name); /* object request has its own copy */ + if (!obj_request) + goto out_unwind; + + rbd_assert(length <= (u64) UINT_MAX); + clone_size = (unsigned int) length; + obj_request->bio_list = bio_chain_clone_range(&bio_list, + &bio_offset, clone_size, + GFP_ATOMIC); + if (!obj_request->bio_list) + goto out_partial; - num_pages = calc_pages_for(ofs, inbound_size); - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); - if (IS_ERR(pages)) - return PTR_ERR(pages); + /* + * Build up the op to use in building the osd + * request. Note that the contents of the op are + * copied by rbd_osd_req_create(). + */ + op = rbd_osd_req_op_create(opcode, offset, length); + if (!op) + goto out_partial; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, + img_request->write_request, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_partial; + /* status and version are initially zero-filled */ + + rbd_img_obj_request_add(img_request, obj_request); + + image_offset += length; + resid -= length; + } - ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, - object_name, ofs, inbound_size, NULL, - pages, num_pages, - flags, - ops, - NULL, 0, - NULL, - linger_req, ver); - if (ret < 0) - goto done; + return 0; - if ((flags & CEPH_OSD_FLAG_READ) && inbound) - ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret); +out_partial: + rbd_obj_request_put(obj_request); +out_unwind: + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_obj_request_put(obj_request); -done: - ceph_release_page_vector(pages, num_pages); - return ret; + return -ENOMEM; } -/* - * Do an asynchronous ceph osd operation - */ -static int rbd_do_op(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - char *seg_name; - u64 seg_ofs; - u64 seg_len; - int ret; - struct ceph_osd_req_op *ops; - u32 payload_len; - int opcode; - int flags; - u64 snapid; - - seg_name = rbd_segment_name(rbd_dev, ofs); - if (!seg_name) - return -ENOMEM; - seg_len = rbd_segment_length(rbd_dev, ofs, len); - seg_ofs = rbd_segment_offset(rbd_dev, ofs); - - if (rq_data_dir(rq) == WRITE) { - opcode = CEPH_OSD_OP_WRITE; - flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; - snapid = CEPH_NOSNAP; - payload_len = seg_len; - } else { - opcode = CEPH_OSD_OP_READ; - flags = CEPH_OSD_FLAG_READ; - snapc = NULL; - snapid = rbd_dev->spec->snap_id; - payload_len = 0; +static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request; + u32 which = obj_request->which; + bool more = true; + + img_request = obj_request->img_request; + + dout("%s: img %p obj %p\n", __func__, img_request, obj_request); + rbd_assert(img_request != NULL); + rbd_assert(img_request->rq != NULL); + rbd_assert(img_request->obj_request_count > 0); + rbd_assert(which != BAD_WHICH); + rbd_assert(which < img_request->obj_request_count); + rbd_assert(which >= img_request->next_completion); + + spin_lock_irq(&img_request->completion_lock); + if (which != img_request->next_completion) + goto out; + + for_each_obj_request_from(img_request, obj_request) { + unsigned int xferred; + int result; + + rbd_assert(more); + rbd_assert(which < img_request->obj_request_count); + + if (!obj_request_done_test(obj_request)) + break; + + rbd_assert(obj_request->xferred <= (u64) UINT_MAX); + xferred = (unsigned int) obj_request->xferred; + result = (int) obj_request->result; + if (result) + rbd_warn(NULL, "obj_request %s result %d xferred %u\n", + img_request->write_request ? "write" : "read", + result, xferred); + + more = blk_end_request(img_request->rq, result, xferred); + which++; } - ret = -ENOMEM; - ops = rbd_create_rw_ops(1, opcode, payload_len); - if (!ops) - goto done; + rbd_assert(more ^ (which == img_request->obj_request_count)); + img_request->next_completion = which; +out: + spin_unlock_irq(&img_request->completion_lock); - /* we've taken care of segment sizes earlier when we - cloned the bios. We should never have a segment - truncated at this point */ - rbd_assert(seg_len == len); - - ret = rbd_do_request(rq, rbd_dev, snapc, snapid, - seg_name, seg_ofs, seg_len, - bio, - NULL, 0, - flags, - ops, - coll, coll_index, - rbd_req_cb, 0, NULL); - - rbd_destroy_ops(ops); -done: - kfree(seg_name); - return ret; + if (!more) + rbd_img_request_complete(img_request); } -/* - * Request sync osd read - */ -static int rbd_req_sync_read(struct rbd_device *rbd_dev, - u64 snapid, - const char *object_name, - u64 ofs, u64 len, - char *buf, - u64 *ver) -{ - struct ceph_osd_req_op *ops; - int ret; +static int rbd_img_request_submit(struct rbd_img_request *img_request) +{ + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); - if (!ops) - return -ENOMEM; + dout("%s: img %p\n", __func__, img_request); + for_each_obj_request(img_request, obj_request) { + int ret; - ret = rbd_req_sync_op(rbd_dev, NULL, - snapid, - CEPH_OSD_FLAG_READ, - ops, object_name, ofs, len, buf, NULL, ver); - rbd_destroy_ops(ops); + obj_request->callback = rbd_img_obj_callback; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + return ret; + /* + * The image request has its own reference to each + * of its object requests, so we can safely drop the + * initial one here. + */ + rbd_obj_request_put(obj_request); + } - return ret; + return 0; } -/* - * Request sync osd watch - */ -static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, - u64 ver, - u64 notify_id) +static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, + u64 ver, u64 notify_id) { - struct ceph_osd_req_op *ops; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; + struct ceph_osd_client *osdc; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); - if (!ops) + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) return -ENOMEM; - ops[0].watch.ver = cpu_to_le64(ver); - ops[0].watch.cookie = notify_id; - ops[0].watch.flag = 0; + ret = -ENOMEM; + op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, - rbd_dev->header_name, 0, 0, NULL, - NULL, 0, - CEPH_OSD_FLAG_READ, - ops, - NULL, 0, - rbd_simple_req_cb, 0, NULL); + osdc = &rbd_dev->rbd_client->client->osdc; + obj_request->callback = rbd_obj_request_put; + ret = rbd_obj_request_submit(osdc, obj_request); +out: + if (ret) + rbd_obj_request_put(obj_request); - rbd_destroy_ops(ops); return ret; } @@ -1381,95 +1791,103 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) if (!rbd_dev) return; - dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", + dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, rbd_dev->header_name, (unsigned long long) notify_id, (unsigned int) opcode); rc = rbd_dev_refresh(rbd_dev, &hver); if (rc) - pr_warning(RBD_DRV_NAME "%d got notification but failed to " - " update snaps: %d\n", rbd_dev->major, rc); + rbd_warn(rbd_dev, "got notification but failed to " + " update snaps: %d\n", rc); - rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); + rbd_obj_notify_ack(rbd_dev, hver, notify_id); } /* - * Request sync osd watch + * Request sync osd watch/unwatch. The value of "start" determines + * whether a watch request is being initiated or torn down. */ -static int rbd_req_sync_watch(struct rbd_device *rbd_dev) +static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) { - struct ceph_osd_req_op *ops; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; + rbd_assert(start ^ !!rbd_dev->watch_event); + rbd_assert(start ^ !!rbd_dev->watch_request); - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, - (void *)rbd_dev, &rbd_dev->watch_event); - if (ret < 0) - goto fail; + if (start) { + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, + &rbd_dev->watch_event); + if (ret < 0) + return ret; + rbd_assert(rbd_dev->watch_event != NULL); + } - ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 1; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) + goto out_cancel; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH, + rbd_dev->watch_event->cookie, + rbd_dev->header.obj_version, start); + if (!op) + goto out_cancel; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_cancel; + + if (start) + ceph_osdc_set_request_linger(osdc, obj_request->osd_req); + else + ceph_osdc_unregister_linger_request(osdc, + rbd_dev->watch_request->osd_req); + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out_cancel; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out_cancel; + ret = obj_request->result; + if (ret) + goto out_cancel; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, - &rbd_dev->watch_request, NULL); + /* + * A watch request is set to linger, so the underlying osd + * request won't go away until we unregister it. We retain + * a pointer to the object request during that time (in + * rbd_dev->watch_request), so we'll keep a reference to + * it. We'll drop that reference (below) after we've + * unregistered it. + */ + if (start) { + rbd_dev->watch_request = obj_request; - if (ret < 0) - goto fail_event; + return 0; + } - rbd_destroy_ops(ops); - return 0; + /* We have successfully torn down the watch request */ -fail_event: + rbd_obj_request_put(rbd_dev->watch_request); + rbd_dev->watch_request = NULL; +out_cancel: + /* Cancel the event if we're tearing down, or on error */ ceph_osdc_cancel_event(rbd_dev->watch_event); rbd_dev->watch_event = NULL; -fail: - rbd_destroy_ops(ops); - return ret; -} + if (obj_request) + rbd_obj_request_put(obj_request); -/* - * Request sync osd unwatch - */ -static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) -{ - struct ceph_osd_req_op *ops; - int ret; - - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; - - ops[0].watch.ver = 0; - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 0; - - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, NULL, NULL); - - - rbd_destroy_ops(ops); - ceph_osdc_cancel_event(rbd_dev->watch_event); - rbd_dev->watch_event = NULL; return ret; } /* * Synchronous osd object method call */ -static int rbd_req_sync_exec(struct rbd_device *rbd_dev, +static int rbd_obj_method_sync(struct rbd_device *rbd_dev, const char *object_name, const char *class_name, const char *method_name, @@ -1477,169 +1895,154 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev, size_t outbound_size, char *inbound, size_t inbound_size, - int flags, - u64 *ver) + u64 *version) { - struct ceph_osd_req_op *ops; - int class_name_len = strlen(class_name); - int method_name_len = strlen(method_name); - int payload_size; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct ceph_osd_req_op *op; + struct page **pages; + u32 page_count; int ret; /* - * Any input parameters required by the method we're calling - * will be sent along with the class and method names as - * part of the message payload. That data and its size are - * supplied via the indata and indata_len fields (named from - * the perspective of the server side) in the OSD request - * operation. + * Method calls are ultimately read operations but they + * don't involve object data (so no offset or length). + * The result should placed into the inbound buffer + * provided. They also supply outbound data--parameters for + * the object method. Currently if this is present it will + * be a snapshot id. */ - payload_size = class_name_len + method_name_len + outbound_size; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size); - if (!ops) - return -ENOMEM; + page_count = (u32) calc_pages_for(0, inbound_size); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + return PTR_ERR(pages); - ops[0].cls.class_name = class_name; - ops[0].cls.class_len = (__u8) class_name_len; - ops[0].cls.method_name = method_name; - ops[0].cls.method_len = (__u8) method_name_len; - ops[0].cls.argc = 0; - ops[0].cls.indata = outbound; - ops[0].cls.indata_len = outbound_size; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, 0, 0, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - flags, ops, - object_name, 0, inbound_size, inbound, - NULL, ver); + obj_request->pages = pages; + obj_request->page_count = page_count; - rbd_destroy_ops(ops); + op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name, + method_name, outbound, outbound_size); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - dout("cls_exec returned %d\n", ret); - return ret; -} + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; -static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) -{ - struct rbd_req_coll *coll = - kzalloc(sizeof(struct rbd_req_coll) + - sizeof(struct rbd_req_status) * num_reqs, - GFP_ATOMIC); + ret = obj_request->result; + if (ret < 0) + goto out; + ret = 0; + ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); - if (!coll) - return NULL; - coll->total = num_reqs; - kref_init(&coll->kref); - return coll; + return ret; } -/* - * block device queue callback - */ -static void rbd_rq_fn(struct request_queue *q) +static void rbd_request_fn(struct request_queue *q) + __releases(q->queue_lock) __acquires(q->queue_lock) { struct rbd_device *rbd_dev = q->queuedata; + bool read_only = rbd_dev->mapping.read_only; struct request *rq; + int result; while ((rq = blk_fetch_request(q))) { - struct bio *bio; - bool do_write; - unsigned int size; - u64 ofs; - int num_segs, cur_seg = 0; - struct rbd_req_coll *coll; - struct ceph_snap_context *snapc; - unsigned int bio_offset; - - dout("fetched request\n"); - - /* filter out block requests we don't understand */ - if ((rq->cmd_type != REQ_TYPE_FS)) { - __blk_end_request_all(rq, 0); - continue; - } + bool write_request = rq_data_dir(rq) == WRITE; + struct rbd_img_request *img_request; + u64 offset; + u64 length; + + /* Ignore any non-FS requests that filter through. */ - /* deduce our operation (read, write) */ - do_write = (rq_data_dir(rq) == WRITE); - if (do_write && rbd_dev->mapping.read_only) { - __blk_end_request_all(rq, -EROFS); + if (rq->cmd_type != REQ_TYPE_FS) { + dout("%s: non-fs request type %d\n", __func__, + (int) rq->cmd_type); + __blk_end_request_all(rq, 0); continue; } - spin_unlock_irq(q->queue_lock); + /* Ignore/skip any zero-length requests */ - down_read(&rbd_dev->header_rwsem); + offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT; + length = (u64) blk_rq_bytes(rq); - if (!rbd_dev->exists) { - rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); - up_read(&rbd_dev->header_rwsem); - dout("request for non-existent snapshot"); - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENXIO); + if (!length) { + dout("%s: zero-length request\n", __func__); + __blk_end_request_all(rq, 0); continue; } - snapc = ceph_get_snap_context(rbd_dev->header.snapc); - - up_read(&rbd_dev->header_rwsem); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * SECTOR_SIZE; - bio = rq->bio; + spin_unlock_irq(q->queue_lock); - dout("%s 0x%x bytes at 0x%llx\n", - do_write ? "write" : "read", - size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); + /* Disallow writes to a read-only device */ - num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); - if (num_segs <= 0) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, num_segs); - ceph_put_snap_context(snapc); - continue; + if (write_request) { + result = -EROFS; + if (read_only) + goto end_request; + rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); } - coll = rbd_alloc_coll(num_segs); - if (!coll) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENOMEM); - ceph_put_snap_context(snapc); - continue; - } - - bio_offset = 0; - do { - u64 limit = rbd_segment_length(rbd_dev, ofs, size); - unsigned int chain_size; - struct bio *bio_chain; - - BUG_ON(limit > (u64) UINT_MAX); - chain_size = (unsigned int) limit; - dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); - kref_get(&coll->kref); + /* + * Quit early if the mapped snapshot no longer + * exists. It's still possible the snapshot will + * have disappeared by the time our request arrives + * at the osd, but there's no sense in sending it if + * we already know. + */ + if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { + dout("request for non-existent snapshot"); + rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); + result = -ENXIO; + goto end_request; + } - /* Pass a cloned bio chain via an osd request */ + result = -EINVAL; + if (WARN_ON(offset && length > U64_MAX - offset + 1)) + goto end_request; /* Shouldn't happen */ - bio_chain = bio_chain_clone_range(&bio, - &bio_offset, chain_size, - GFP_ATOMIC); - if (bio_chain) - (void) rbd_do_op(rq, rbd_dev, snapc, - ofs, chain_size, - bio_chain, coll, cur_seg); - else - rbd_coll_end_req_index(rq, coll, cur_seg, - -ENOMEM, chain_size); - size -= chain_size; - ofs += chain_size; + result = -ENOMEM; + img_request = rbd_img_request_create(rbd_dev, offset, length, + write_request); + if (!img_request) + goto end_request; - cur_seg++; - } while (size > 0); - kref_put(&coll->kref, rbd_coll_release); + img_request->rq = rq; + result = rbd_img_request_fill_bio(img_request, rq->bio); + if (!result) + result = rbd_img_request_submit(img_request); + if (result) + rbd_img_request_put(img_request); +end_request: spin_lock_irq(q->queue_lock); - - ceph_put_snap_context(snapc); + if (result < 0) { + rbd_warn(rbd_dev, "obj_request %s result %d\n", + write_request ? "write" : "read", result); + __blk_end_request_all(rq, result); + } } } @@ -1703,6 +2106,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) put_disk(disk); } +static int rbd_obj_read_sync(struct rbd_device *rbd_dev, + const char *object_name, + u64 offset, u64 length, + char *buf, u64 *version) + +{ + struct ceph_osd_req_op *op; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct page **pages = NULL; + u32 page_count; + size_t size; + int ret; + + page_count = (u32) calc_pages_for(offset, length); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + ret = PTR_ERR(pages); + + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, offset, length, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; + + obj_request->pages = pages; + obj_request->page_count = page_count; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; + + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; + + ret = obj_request->result; + if (ret < 0) + goto out; + + rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); + size = (size_t) obj_request->xferred; + ceph_copy_from_page_vector(pages, buf, 0, size); + rbd_assert(size <= (size_t) INT_MAX); + ret = (int) size; + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); + + return ret; +} + /* * Read the complete header for the given rbd device. * @@ -1741,24 +2209,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) if (!ondisk) return ERR_PTR(-ENOMEM); - ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, - rbd_dev->header_name, + ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, 0, size, (char *) ondisk, version); - if (ret < 0) goto out_err; if (WARN_ON((size_t) ret < size)) { ret = -ENXIO; - pr_warning("short header read for image %s" - " (want %zd got %d)\n", - rbd_dev->spec->image_name, size, ret); + rbd_warn(rbd_dev, "short header read (want %zd got %d)", + size, ret); goto out_err; } if (!rbd_dev_ondisk_valid(ondisk)) { ret = -ENXIO; - pr_warning("invalid header for image %s\n", - rbd_dev->spec->image_name); + rbd_warn(rbd_dev, "invalid header"); goto out_err; } @@ -1895,8 +2359,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - /* init rq */ - q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); + q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); if (!q) goto out_disk; @@ -2233,7 +2696,7 @@ static void rbd_spec_free(struct kref *kref) kfree(spec); } -struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, +static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, struct rbd_spec *spec) { struct rbd_device *rbd_dev; @@ -2243,6 +2706,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return NULL; spin_lock_init(&rbd_dev->lock); + rbd_dev->flags = 0; INIT_LIST_HEAD(&rbd_dev->node); INIT_LIST_HEAD(&rbd_dev->snaps); init_rwsem(&rbd_dev->header_rwsem); @@ -2250,6 +2714,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, rbd_dev->spec = spec; rbd_dev->rbd_client = rbdc; + /* Initialize the layout used for all rbd requests */ + + rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); + rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); + return rbd_dev; } @@ -2360,12 +2831,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, __le64 size; } __attribute__ ((packed)) size_buf = { 0 }; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_size", (char *) &snapid, sizeof (snapid), - (char *) &size_buf, sizeof (size_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) &size_buf, sizeof (size_buf), NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2396,15 +2866,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_object_prefix", NULL, 0, - reply_buf, RBD_OBJ_PREFIX_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = reply_buf; rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, @@ -2435,12 +2903,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, u64 incompat; int ret; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_features", (char *) &snapid, sizeof (snapid), (char *) &features_buf, sizeof (features_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2474,7 +2942,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) void *end; char *image_id; u64 overlap; - size_t len = 0; int ret; parent_spec = rbd_spec_alloc(); @@ -2492,12 +2959,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } snapid = cpu_to_le64(CEPH_NOSNAP); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_parent", (char *) &snapid, sizeof (snapid), - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out_err; @@ -2508,13 +2974,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) if (parent_spec->pool_id == CEPH_NOPOOL) goto out; /* No parent? No problem. */ - image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + /* The ceph file layout needs to fit pool id in 32 bits */ + + ret = -EIO; + if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) + goto out; + + image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(image_id)) { ret = PTR_ERR(image_id); goto out_err; } parent_spec->image_id = image_id; - parent_spec->image_id_len = len; ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); ceph_decode_64_safe(&p, end, overlap, out_err); @@ -2544,26 +3015,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) rbd_assert(!rbd_dev->spec->image_name); - image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; + len = strlen(rbd_dev->spec->image_id); + image_id_size = sizeof (__le32) + len; image_id = kmalloc(image_id_size, GFP_KERNEL); if (!image_id) return NULL; p = image_id; end = (char *) image_id + image_id_size; - ceph_encode_string(&p, end, rbd_dev->spec->image_id, - (u32) rbd_dev->spec->image_id_len); + ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; reply_buf = kmalloc(size, GFP_KERNEL); if (!reply_buf) goto out; - ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, + ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, "rbd", "dir_get_name", image_id, image_id_size, - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); + (char *) reply_buf, size, NULL); if (ret < 0) goto out; p = reply_buf; @@ -2602,8 +3072,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) osdc = &rbd_dev->rbd_client->client->osdc; name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); - if (!name) - return -EIO; /* pool id too large (>= 2^31) */ + if (!name) { + rbd_warn(rbd_dev, "there is no pool with id %llu", + rbd_dev->spec->pool_id); /* Really a BUG() */ + return -EIO; + } rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); if (!rbd_dev->spec->pool_name) @@ -2612,19 +3085,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) /* Fetch the image name; tolerate failure here */ name = rbd_dev_image_name(rbd_dev); - if (name) { - rbd_dev->spec->image_name_len = strlen(name); + if (name) rbd_dev->spec->image_name = (char *) name; - } else { - pr_warning(RBD_DRV_NAME "%d " - "unable to get image name for image id %s\n", - rbd_dev->major, rbd_dev->spec->image_id); - } + else + rbd_warn(rbd_dev, "unable to get image name"); /* Look up the snapshot name. */ name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); if (!name) { + rbd_warn(rbd_dev, "no snapshot with id %llu", + rbd_dev->spec->snap_id); /* Really a BUG() */ ret = -EIO; goto out_err; } @@ -2665,12 +3136,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapcontext", NULL, 0, - reply_buf, size, - CEPH_OSD_FLAG_READ, ver); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, ver); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2735,12 +3205,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) return ERR_PTR(-ENOMEM); snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapshot_name", (char *) &snap_id, sizeof (snap_id), - reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2766,7 +3235,7 @@ out: static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, u64 *snap_size, u64 *snap_features) { - __le64 snap_id; + u64 snap_id; u8 order; int ret; @@ -2865,10 +3334,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { struct list_head *next = links->next; - /* Existing snapshot not in the new snap context */ - + /* + * A previously-existing snapshot is not in + * the new snap context. + * + * If the now missing snapshot is the one the + * image is mapped to, clear its exists flag + * so we can avoid sending any more requests + * to it. + */ if (rbd_dev->spec->snap_id == snap->id) - rbd_dev->exists = false; + clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", rbd_dev->spec->snap_id == snap->id ? @@ -2942,7 +3418,7 @@ static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) struct rbd_snap *snap; int ret = 0; - dout("%s called\n", __func__); + dout("%s:\n", __func__); if (WARN_ON(!device_is_registered(&rbd_dev->dev))) return -EIO; @@ -2983,22 +3459,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev) device_unregister(&rbd_dev->dev); } -static int rbd_init_watch_dev(struct rbd_device *rbd_dev) -{ - int ret, rc; - - do { - ret = rbd_req_sync_watch(rbd_dev); - if (ret == -ERANGE) { - rc = rbd_dev_refresh(rbd_dev, NULL); - if (rc < 0) - return rc; - } - } while (ret == -ERANGE); - - return ret; -} - static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); /* @@ -3138,11 +3598,9 @@ static inline char *dup_token(const char **buf, size_t *lenp) size_t len; len = next_token(buf); - dup = kmalloc(len + 1, GFP_KERNEL); + dup = kmemdup(*buf, len + 1, GFP_KERNEL); if (!dup) return NULL; - - memcpy(dup, *buf, len); *(dup + len) = '\0'; *buf += len; @@ -3210,8 +3668,10 @@ static int rbd_add_parse_args(const char *buf, /* The first four tokens are required */ len = next_token(&buf); - if (!len) - return -EINVAL; /* Missing monitor address(es) */ + if (!len) { + rbd_warn(NULL, "no monitor address(es) provided"); + return -EINVAL; + } mon_addrs = buf; mon_addrs_size = len + 1; buf += len; @@ -3220,8 +3680,10 @@ static int rbd_add_parse_args(const char *buf, options = dup_token(&buf, NULL); if (!options) return -ENOMEM; - if (!*options) - goto out_err; /* Missing options */ + if (!*options) { + rbd_warn(NULL, "no options provided"); + goto out_err; + } spec = rbd_spec_alloc(); if (!spec) @@ -3230,14 +3692,18 @@ static int rbd_add_parse_args(const char *buf, spec->pool_name = dup_token(&buf, NULL); if (!spec->pool_name) goto out_mem; - if (!*spec->pool_name) - goto out_err; /* Missing pool name */ + if (!*spec->pool_name) { + rbd_warn(NULL, "no pool name provided"); + goto out_err; + } - spec->image_name = dup_token(&buf, &spec->image_name_len); + spec->image_name = dup_token(&buf, NULL); if (!spec->image_name) goto out_mem; - if (!*spec->image_name) - goto out_err; /* Missing image name */ + if (!*spec->image_name) { + rbd_warn(NULL, "no image name provided"); + goto out_err; + } /* * Snapshot name is optional; default is to use "-" @@ -3251,10 +3717,9 @@ static int rbd_add_parse_args(const char *buf, ret = -ENAMETOOLONG; goto out_err; } - spec->snap_name = kmalloc(len + 1, GFP_KERNEL); + spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); if (!spec->snap_name) goto out_mem; - memcpy(spec->snap_name, buf, len); *(spec->snap_name + len) = '\0'; /* Initialize all rbd options to the defaults */ @@ -3323,7 +3788,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. */ - size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; + size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); object_name = kmalloc(size, GFP_NOIO); if (!object_name) return -ENOMEM; @@ -3339,21 +3804,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) goto out; } - ret = rbd_req_sync_exec(rbd_dev, object_name, + ret = rbd_obj_method_sync(rbd_dev, object_name, "rbd", "get_id", NULL, 0, - response, RBD_IMAGE_ID_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + response, RBD_IMAGE_ID_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = response; rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, p + RBD_IMAGE_ID_LEN_MAX, - &rbd_dev->spec->image_id_len, - GFP_NOIO); + NULL, GFP_NOIO); if (IS_ERR(rbd_dev->spec->image_id)) { ret = PTR_ERR(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; @@ -3377,11 +3839,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); if (!rbd_dev->spec->image_id) return -ENOMEM; - rbd_dev->spec->image_id_len = 0; /* Record the header object name for this rbd image. */ - size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); + size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) { ret = -ENOMEM; @@ -3427,7 +3888,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) * Image id was filled in by the caller. Record the header * object name for this rbd image. */ - size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; + size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) return -ENOMEM; @@ -3542,7 +4003,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) if (ret) goto err_out_bus; - ret = rbd_init_watch_dev(rbd_dev); + ret = rbd_dev_header_watch_sync(rbd_dev, 1); if (ret) goto err_out_bus; @@ -3638,6 +4099,13 @@ static ssize_t rbd_add(struct bus_type *bus, goto err_out_client; spec->pool_id = (u64) rc; + /* The ceph file layout needs to fit pool id in 32 bits */ + + if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { + rc = -EIO; + goto err_out_client; + } + rbd_dev = rbd_dev_create(rbdc, spec); if (!rbd_dev) goto err_out_client; @@ -3691,15 +4159,8 @@ static void rbd_dev_release(struct device *dev) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - if (rbd_dev->watch_request) { - struct ceph_client *client = rbd_dev->rbd_client->client; - - ceph_osdc_unregister_linger_request(&client->osdc, - rbd_dev->watch_request); - } if (rbd_dev->watch_event) - rbd_req_sync_unwatch(rbd_dev); - + rbd_dev_header_watch_sync(rbd_dev, 0); /* clean up and free blkdev */ rbd_free_disk(rbd_dev); @@ -3743,10 +4204,14 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - if (rbd_dev->open_count) { + spin_lock_irq(&rbd_dev->lock); + if (rbd_dev->open_count) ret = -EBUSY; + else + set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); + spin_unlock_irq(&rbd_dev->lock); + if (ret < 0) goto done; - } rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); @@ -3782,10 +4247,15 @@ static void rbd_sysfs_cleanup(void) device_unregister(&rbd_root_dev); } -int __init rbd_init(void) +static int __init rbd_init(void) { int rc; + if (!libceph_compatible(NULL)) { + rbd_warn(NULL, "libceph incompatibility (quitting)"); + + return -EINVAL; + } rc = rbd_sysfs_init(); if (rc) return rc; @@ -3793,7 +4263,7 @@ int __init rbd_init(void) return 0; } -void __exit rbd_exit(void) +static void __exit rbd_exit(void) { rbd_sysfs_cleanup(); } diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile new file mode 100644 index 000000000000..f35cd0b71f7b --- /dev/null +++ b/drivers/block/rsxx/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o +rsxx-y := config.o core.o cregs.o dev.o dma.o diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c new file mode 100644 index 000000000000..a295e7e9ee41 --- /dev/null +++ b/drivers/block/rsxx/config.c @@ -0,0 +1,213 @@ +/* +* Filename: config.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/types.h> +#include <linux/crc32.h> +#include <linux/swab.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +static void initialize_config(void *config) +{ + struct rsxx_card_cfg *cfg = config; + + cfg->hdr.version = RSXX_CFG_VERSION; + + cfg->data.block_size = RSXX_HW_BLK_SIZE; + cfg->data.stripe_size = RSXX_HW_BLK_SIZE; + cfg->data.vendor_id = RSXX_VENDOR_ID_TMS_IBM; + cfg->data.cache_order = (-1); + cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED; + cfg->data.intr_coal.count = 0; + cfg->data.intr_coal.latency = 0; +} + +static u32 config_data_crc32(struct rsxx_card_cfg *cfg) +{ + /* + * Return the compliment of the CRC to ensure compatibility + * (i.e. this is how early rsxx drivers did it.) + */ + + return ~crc32(~0, &cfg->data, sizeof(cfg->data)); +} + + +/*----------------- Config Byte Swap Functions -------------------*/ +static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr) +{ + hdr->version = be32_to_cpu((__force __be32) hdr->version); + hdr->crc = be32_to_cpu((__force __be32) hdr->crc); +} + +static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr) +{ + hdr->version = (__force u32) cpu_to_be32(hdr->version); + hdr->crc = (__force u32) cpu_to_be32(hdr->crc); +} + +static void config_data_swab(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = swab32(data[i]); +} + +static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = le32_to_cpu((__force __le32) data[i]); +} + +static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = (__force u32) cpu_to_le32(data[i]); +} + + +/*----------------- Config Operations ------------------*/ +static int rsxx_save_config(struct rsxx_cardinfo *card) +{ + struct rsxx_card_cfg cfg; + int st; + + memcpy(&cfg, &card->config, sizeof(cfg)); + + if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) { + dev_err(CARD_TO_DEV(card), + "Cannot save config with invalid version %d\n", + cfg.hdr.version); + return -EINVAL; + } + + /* Convert data to little endian for the CRC calculation. */ + config_data_cpu_to_le(&cfg); + + cfg.hdr.crc = config_data_crc32(&cfg); + + /* + * Swap the data from little endian to big endian so it can be + * stored. + */ + config_data_swab(&cfg); + config_hdr_cpu_to_be(&cfg.hdr); + + st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1); + if (st) + return st; + + return 0; +} + +int rsxx_load_config(struct rsxx_cardinfo *card) +{ + int st; + u32 crc; + + st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config), + &card->config, 1); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed reading card config.\n"); + return st; + } + + config_hdr_be_to_cpu(&card->config.hdr); + + if (card->config.hdr.version == RSXX_CFG_VERSION) { + /* + * We calculate the CRC with the data in little endian, because + * early drivers did not take big endian CPUs into account. + * The data is always stored in big endian, so we need to byte + * swap it before calculating the CRC. + */ + + config_data_swab(&card->config); + + /* Check the CRC */ + crc = config_data_crc32(&card->config); + if (crc != card->config.hdr.crc) { + dev_err(CARD_TO_DEV(card), + "Config corruption detected!\n"); + dev_info(CARD_TO_DEV(card), + "CRC (sb x%08x is x%08x)\n", + card->config.hdr.crc, crc); + return -EIO; + } + + /* Convert the data to CPU byteorder */ + config_data_le_to_cpu(&card->config); + + } else if (card->config.hdr.version != 0) { + dev_err(CARD_TO_DEV(card), + "Invalid config version %d.\n", + card->config.hdr.version); + /* + * Config version changes require special handling from the + * user + */ + return -EINVAL; + } else { + dev_info(CARD_TO_DEV(card), + "Initializing card configuration.\n"); + initialize_config(card); + st = rsxx_save_config(card); + if (st) + return st; + } + + card->config_valid = 1; + + dev_dbg(CARD_TO_DEV(card), "version: x%08x\n", + card->config.hdr.version); + dev_dbg(CARD_TO_DEV(card), "crc: x%08x\n", + card->config.hdr.crc); + dev_dbg(CARD_TO_DEV(card), "block_size: x%08x\n", + card->config.data.block_size); + dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n", + card->config.data.stripe_size); + dev_dbg(CARD_TO_DEV(card), "vendor_id: x%08x\n", + card->config.data.vendor_id); + dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n", + card->config.data.cache_order); + dev_dbg(CARD_TO_DEV(card), "mode: x%08x\n", + card->config.data.intr_coal.mode); + dev_dbg(CARD_TO_DEV(card), "count: x%08x\n", + card->config.data.intr_coal.count); + dev_dbg(CARD_TO_DEV(card), "latency: x%08x\n", + card->config.data.intr_coal.latency); + + return 0; +} + diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c new file mode 100644 index 000000000000..e5162487686a --- /dev/null +++ b/drivers/block/rsxx/core.c @@ -0,0 +1,649 @@ +/* +* Filename: core.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/bitops.h> + +#include <linux/genhd.h> +#include <linux/idr.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +#define NO_LEGACY 0 + +MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver"); +MODULE_AUTHOR("IBM <support@ramsan.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); + +static unsigned int force_legacy = NO_LEGACY; +module_param(force_legacy, uint, 0444); +MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts"); + +static DEFINE_IDA(rsxx_disk_ida); +static DEFINE_SPINLOCK(rsxx_ida_lock); + +/*----------------- Interrupt Control & Handling -------------------*/ +static void __enable_intr(unsigned int *mask, unsigned int intr) +{ + *mask |= intr; +} + +static void __disable_intr(unsigned int *mask, unsigned int intr) +{ + *mask &= ~intr; +} + +/* + * NOTE: Disabling the IER will disable the hardware interrupt. + * Disabling the ISR will disable the software handling of the ISR bit. + * + * Enable/Disable interrupt functions assume the card->irq_lock + * is held by the caller. + */ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->isr_mask, intr); + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + __disable_intr(&card->isr_mask, intr); + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +static irqreturn_t rsxx_isr(int irq, void *pdata) +{ + struct rsxx_cardinfo *card = pdata; + unsigned int isr; + int handled = 0; + int reread_isr; + int i; + + spin_lock(&card->irq_lock); + + do { + reread_isr = 0; + + isr = ioread32(card->regmap + ISR); + if (isr == 0xffffffff) { + /* + * A few systems seem to have an intermittent issue + * where PCI reads return all Fs, but retrying the read + * a little later will return as expected. + */ + dev_info(CARD_TO_DEV(card), + "ISR = 0xFFFFFFFF, retrying later\n"); + break; + } + + isr &= card->isr_mask; + if (!isr) + break; + + for (i = 0; i < card->n_targets; i++) { + if (isr & CR_INTR_DMA(i)) { + if (card->ier_mask & CR_INTR_DMA(i)) { + rsxx_disable_ier(card, CR_INTR_DMA(i)); + reread_isr = 1; + } + queue_work(card->ctrl[i].done_wq, + &card->ctrl[i].dma_done_work); + handled++; + } + } + + if (isr & CR_INTR_CREG) { + schedule_work(&card->creg_ctrl.done_work); + handled++; + } + + if (isr & CR_INTR_EVENT) { + schedule_work(&card->event_work); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + handled++; + } + } while (reread_isr); + + spin_unlock(&card->irq_lock); + + return handled ? IRQ_HANDLED : IRQ_NONE; +} + +/*----------------- Card Event Handler -------------------*/ +static char *rsxx_card_state_to_str(unsigned int state) +{ + static char *state_strings[] = { + "Unknown", "Shutdown", "Starting", "Formatting", + "Uninitialized", "Good", "Shutting Down", + "Fault", "Read Only Fault", "dStroying" + }; + + return state_strings[ffs(state)]; +} + +static void card_state_change(struct rsxx_cardinfo *card, + unsigned int new_state) +{ + int st; + + dev_info(CARD_TO_DEV(card), + "card state change detected.(%s -> %s)\n", + rsxx_card_state_to_str(card->state), + rsxx_card_state_to_str(new_state)); + + card->state = new_state; + + /* Don't attach DMA interfaces if the card has an invalid config */ + if (!card->config_valid) + return; + + switch (new_state) { + case CARD_STATE_RD_ONLY_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware has entered read-only mode!\n"); + /* + * Fall through so the DMA devices can be attached and + * the user can attempt to pull off their data. + */ + case CARD_STATE_GOOD: + st = rsxx_get_card_size8(card, &card->size8); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed attaching DMA devices\n"); + + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + break; + + case CARD_STATE_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware Fault reported!\n"); + /* Fall through. */ + + /* Everything else, detach DMA interface if it's attached. */ + case CARD_STATE_SHUTDOWN: + case CARD_STATE_STARTING: + case CARD_STATE_FORMATTING: + case CARD_STATE_UNINITIALIZED: + case CARD_STATE_SHUTTING_DOWN: + /* + * dStroy is a term coined by marketing to represent the low level + * secure erase. + */ + case CARD_STATE_DSTROYING: + set_capacity(card->gendisk, 0); + break; + } +} + +static void card_event_handler(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + unsigned int state; + unsigned long flags; + int st; + + card = container_of(work, struct rsxx_cardinfo, event_work); + + if (unlikely(card->halt)) + return; + + /* + * Enable the interrupt now to avoid any weird race conditions where a + * state change might occur while rsxx_get_card_state() is + * processing a returned creg cmd. + */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + st = rsxx_get_card_state(card, &state); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed reading state after event.\n"); + return; + } + + if (card->state != state) + card_state_change(card, state); + + if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +/*----------------- Card Operations -------------------*/ +static int card_shutdown(struct rsxx_cardinfo *card) +{ + unsigned int state; + signed long start; + const int timeout = msecs_to_jiffies(120000); + int st; + + /* We can't issue a shutdown if the card is in a transition state */ + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state == CARD_STATE_STARTING && + (jiffies - start < timeout)); + + if (state == CARD_STATE_STARTING) + return -ETIMEDOUT; + + /* Only issue a shutdown if we need to */ + if ((state != CARD_STATE_SHUTTING_DOWN) && + (state != CARD_STATE_SHUTDOWN)) { + st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN); + if (st) + return st; + } + + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state != CARD_STATE_SHUTDOWN && + (jiffies - start < timeout)); + + if (state != CARD_STATE_SHUTDOWN) + return -ETIMEDOUT; + + return 0; +} + +/*----------------- Driver Initialization & Setup -------------------*/ +/* Returns: 0 if the driver is compatible with the device + -1 if the driver is NOT compatible with the device */ +static int rsxx_compatibility_check(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + if (pci_rev > RS70_PCI_REV_SUPPORTED) + return -1; + return 0; +} + +static int rsxx_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct rsxx_cardinfo *card; + int st; + + dev_info(&dev->dev, "PCI-Flash SSD discovered\n"); + + card = kzalloc(sizeof(*card), GFP_KERNEL); + if (!card) + return -ENOMEM; + + card->dev = dev; + pci_set_drvdata(dev, card); + + do { + if (!ida_pre_get(&rsxx_disk_ida, GFP_KERNEL)) { + st = -ENOMEM; + goto failed_ida_get; + } + + spin_lock(&rsxx_ida_lock); + st = ida_get_new(&rsxx_disk_ida, &card->disk_id); + spin_unlock(&rsxx_ida_lock); + } while (st == -EAGAIN); + + if (st) + goto failed_ida_get; + + st = pci_enable_device(dev); + if (st) + goto failed_enable; + + pci_set_master(dev); + pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE); + + st = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + if (st) { + dev_err(CARD_TO_DEV(card), + "No usable DMA configuration,aborting\n"); + goto failed_dma_mask; + } + + st = pci_request_regions(dev, DRIVER_NAME); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed to request memory region\n"); + goto failed_request_regions; + } + + if (pci_resource_len(dev, 0) == 0) { + dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n"); + st = -ENOMEM; + goto failed_iomap; + } + + card->regmap = pci_iomap(dev, 0, 0); + if (!card->regmap) { + dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n"); + st = -ENOMEM; + goto failed_iomap; + } + + spin_lock_init(&card->irq_lock); + card->halt = 0; + + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + + if (!force_legacy) { + st = pci_enable_msi(dev); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed to enable MSI\n"); + } + + st = request_irq(dev->irq, rsxx_isr, IRQF_DISABLED | IRQF_SHARED, + DRIVER_NAME, card); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed requesting IRQ%d\n", dev->irq); + goto failed_irq; + } + + /************* Setup Processor Command Interface *************/ + rsxx_creg_setup(card); + + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG); + spin_unlock_irq(&card->irq_lock); + + st = rsxx_compatibility_check(card); + if (st) { + dev_warn(CARD_TO_DEV(card), + "Incompatible driver detected. Please update the driver.\n"); + st = -EINVAL; + goto failed_compatiblity_check; + } + + /************* Load Card Config *************/ + st = rsxx_load_config(card); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed loading card config\n"); + + /************* Setup DMA Engine *************/ + st = rsxx_get_num_targets(card, &card->n_targets); + if (st) + dev_info(CARD_TO_DEV(card), + "Failed reading the number of DMA targets\n"); + + card->ctrl = kzalloc(card->n_targets * sizeof(*card->ctrl), GFP_KERNEL); + if (!card->ctrl) { + st = -ENOMEM; + goto failed_dma_setup; + } + + st = rsxx_dma_setup(card); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed to setup DMA engine\n"); + goto failed_dma_setup; + } + + /************* Setup Card Event Handler *************/ + INIT_WORK(&card->event_work, card_event_handler); + + st = rsxx_setup_dev(card); + if (st) + goto failed_create_dev; + + rsxx_get_card_state(card, &card->state); + + dev_info(CARD_TO_DEV(card), + "card state: %s\n", + rsxx_card_state_to_str(card->state)); + + /* + * Now that the DMA Engine and devices have been setup, + * we can enable the event interrupt(it kicks off actions in + * those layers so we couldn't enable it right away.) + */ + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irq(&card->irq_lock); + + if (card->state == CARD_STATE_SHUTDOWN) { + st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP); + if (st) + dev_crit(CARD_TO_DEV(card), + "Failed issuing card startup\n"); + } else if (card->state == CARD_STATE_GOOD || + card->state == CARD_STATE_RD_ONLY_FAULT) { + st = rsxx_get_card_size8(card, &card->size8); + if (st) + card->size8 = 0; + } + + rsxx_attach_dev(card); + + return 0; + +failed_create_dev: + rsxx_dma_destroy(card); +failed_dma_setup: +failed_compatiblity_check: + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + free_irq(dev->irq, card); + if (!force_legacy) + pci_disable_msi(dev); +failed_irq: + pci_iounmap(dev, card->regmap); +failed_iomap: + pci_release_regions(dev); +failed_request_regions: +failed_dma_mask: + pci_disable_device(dev); +failed_enable: + spin_lock(&rsxx_ida_lock); + ida_remove(&rsxx_disk_ida, card->disk_id); + spin_unlock(&rsxx_ida_lock); +failed_ida_get: + kfree(card); + + return st; +} + +static void rsxx_pci_remove(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int st; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), + "Removing PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + st = card_shutdown(card); + if (st) + dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n"); + + /* Sync outstanding event handlers. */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + /* Prevent work_structs from re-queuing themselves. */ + card->halt = 1; + + cancel_work_sync(&card->event_work); + + rsxx_destroy_dev(card); + rsxx_dma_destroy(card); + + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irqrestore(&card->irq_lock, flags); + free_irq(dev->irq, card); + + if (!force_legacy) + pci_disable_msi(dev); + + rsxx_creg_destroy(card); + + pci_iounmap(dev, card->regmap); + + pci_disable_device(dev); + pci_release_regions(dev); + + kfree(card); +} + +static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state) +{ + /* We don't support suspend at this time. */ + return -ENOSYS; +} + +static void rsxx_pci_shutdown(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + card_shutdown(card); +} + +static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = { + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)}, + {0,}, +}; + +MODULE_DEVICE_TABLE(pci, rsxx_pci_ids); + +static struct pci_driver rsxx_pci_driver = { + .name = DRIVER_NAME, + .id_table = rsxx_pci_ids, + .probe = rsxx_pci_probe, + .remove = rsxx_pci_remove, + .suspend = rsxx_pci_suspend, + .shutdown = rsxx_pci_shutdown, +}; + +static int __init rsxx_core_init(void) +{ + int st; + + st = rsxx_dev_init(); + if (st) + return st; + + st = rsxx_dma_init(); + if (st) + goto dma_init_failed; + + st = rsxx_creg_init(); + if (st) + goto creg_init_failed; + + return pci_register_driver(&rsxx_pci_driver); + +creg_init_failed: + rsxx_dma_cleanup(); +dma_init_failed: + rsxx_dev_cleanup(); + + return st; +} + +static void __exit rsxx_core_cleanup(void) +{ + pci_unregister_driver(&rsxx_pci_driver); + rsxx_creg_cleanup(); + rsxx_dma_cleanup(); + rsxx_dev_cleanup(); +} + +module_init(rsxx_core_init); +module_exit(rsxx_core_cleanup); diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c new file mode 100644 index 000000000000..80bbe639fccd --- /dev/null +++ b/drivers/block/rsxx/cregs.c @@ -0,0 +1,758 @@ +/* +* Filename: cregs.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/completion.h> +#include <linux/slab.h> + +#include "rsxx_priv.h" + +#define CREG_TIMEOUT_MSEC 10000 + +typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st); + +struct creg_cmd { + struct list_head list; + creg_cmd_cb cb; + void *cb_private; + unsigned int op; + unsigned int addr; + int cnt8; + void *buf; + unsigned int stream; + unsigned int status; +}; + +static struct kmem_cache *creg_cmd_pool; + + +/*------------ Private Functions --------------*/ + +#if defined(__LITTLE_ENDIAN) +#define LITTLE_ENDIAN 1 +#elif defined(__BIG_ENDIAN) +#define LITTLE_ENDIAN 0 +#else +#error Unknown endianess!!! Aborting... +#endif + +static void copy_to_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + iowrite32be(data[i], card->regmap + CREG_DATA(i)); + else + iowrite32(data[i], card->regmap + CREG_DATA(i)); + } +} + + +static void copy_from_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + data[i] = ioread32be(card->regmap + CREG_DATA(i)); + else + data[i] = ioread32(card->regmap + CREG_DATA(i)); + } +} + +static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + + /* + * Spin lock is needed because this can be called in atomic/interrupt + * context. + */ + spin_lock_bh(&card->creg_ctrl.lock); + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + spin_unlock_bh(&card->creg_ctrl.lock); + + return cmd; +} + +static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) +{ + iowrite32(cmd->addr, card->regmap + CREG_ADD); + iowrite32(cmd->cnt8, card->regmap + CREG_CNT); + + if (cmd->op == CREG_OP_WRITE) { + if (cmd->buf) + copy_to_creg_data(card, cmd->cnt8, + cmd->buf, cmd->stream); + } + + /* + * Data copy must complete before initiating the command. This is + * needed for weakly ordered processors (i.e. PowerPC), so that all + * neccessary registers are written before we kick the hardware. + */ + wmb(); + + /* Setting the valid bit will kick off the command. */ + iowrite32(cmd->op, card->regmap + CREG_CMD); +} + +static void creg_kick_queue(struct rsxx_cardinfo *card) +{ + if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue)) + return; + + card->creg_ctrl.active = 1; + card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue, + struct creg_cmd, list); + list_del(&card->creg_ctrl.active_cmd->list); + card->creg_ctrl.q_depth--; + + /* + * We have to set the timer before we push the new command. Otherwise, + * we could create a race condition that would occur if the timer + * was not canceled, and expired after the new command was pushed, + * but before the command was issued to hardware. + */ + mod_timer(&card->creg_ctrl.cmd_timer, + jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC)); + + creg_issue_cmd(card, card->creg_ctrl.active_cmd); +} + +static int creg_queue_cmd(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + creg_cmd_cb callback, + void *cb_private) +{ + struct creg_cmd *cmd; + + /* Don't queue stuff up if we're halted. */ + if (unlikely(card->halt)) + return -EINVAL; + + if (card->creg_ctrl.reset) + return -EAGAIN; + + if (cnt8 > MAX_CREG_DATA8) + return -EINVAL; + + cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + INIT_LIST_HEAD(&cmd->list); + + cmd->op = op; + cmd->addr = addr; + cmd->cnt8 = cnt8; + cmd->buf = buf; + cmd->stream = stream; + cmd->cb = callback; + cmd->cb_private = cb_private; + cmd->status = 0; + + spin_lock(&card->creg_ctrl.lock); + list_add_tail(&cmd->list, &card->creg_ctrl.queue); + card->creg_ctrl.q_depth++; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); + + return 0; +} + +static void creg_cmd_timed_out(unsigned long data) +{ + struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data; + struct creg_cmd *cmd; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + card->creg_ctrl.creg_stats.creg_timeout++; + dev_warn(CARD_TO_DEV(card), + "No active command associated with timeout!\n"); + return; + } + + if (cmd->cb) + cmd->cb(card, cmd, -ETIMEDOUT); + + kmem_cache_free(creg_cmd_pool, cmd); + + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + + +static void creg_cmd_done(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + struct creg_cmd *cmd; + int st = 0; + + card = container_of(work, struct rsxx_cardinfo, + creg_ctrl.done_work); + + /* + * The timer could not be cancelled for some reason, + * race to pop the active command. + */ + if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0) + card->creg_ctrl.creg_stats.failed_cancel_timer++; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + dev_err(CARD_TO_DEV(card), + "Spurious creg interrupt!\n"); + return; + } + + card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT); + cmd->status = card->creg_ctrl.creg_stats.stat; + if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) { + dev_err(CARD_TO_DEV(card), + "Invalid status on creg command\n"); + /* + * At this point we're probably reading garbage from HW. Don't + * do anything else that could mess up the system and let + * the sync function return an error. + */ + st = -EIO; + goto creg_done; + } else if (cmd->status & CREG_STAT_ERROR) { + st = -EIO; + } + + if ((cmd->op == CREG_OP_READ)) { + unsigned int cnt8 = ioread32(card->regmap + CREG_CNT); + + /* Paranoid Sanity Checks */ + if (!cmd->buf) { + dev_err(CARD_TO_DEV(card), + "Buffer not given for read.\n"); + st = -EIO; + goto creg_done; + } + if (cnt8 != cmd->cnt8) { + dev_err(CARD_TO_DEV(card), + "count mismatch\n"); + st = -EIO; + goto creg_done; + } + + copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); + } + +creg_done: + if (cmd->cb) + cmd->cb(card, cmd, st); + + kmem_cache_free(creg_cmd_pool, cmd); + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + +static void creg_reset(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd = NULL; + struct creg_cmd *tmp; + unsigned long flags; + + /* + * mutex_trylock is used here because if reset_lock is taken then a + * reset is already happening. So, we can just go ahead and return. + */ + if (!mutex_trylock(&card->creg_ctrl.reset_lock)) + return; + + card->creg_ctrl.reset = 1; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + dev_warn(CARD_TO_DEV(card), + "Resetting creg interface for recovery\n"); + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + card->creg_ctrl.q_depth--; + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + } + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + + card->creg_ctrl.active = 0; + } + spin_unlock(&card->creg_ctrl.lock); + + card->creg_ctrl.reset = 0; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + mutex_unlock(&card->creg_ctrl.reset_lock); +} + +/* Used for synchronous accesses */ +struct creg_completion { + struct completion *cmd_done; + int st; + u32 creg_status; +}; + +static void creg_cmd_done_cb(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + struct creg_completion *cmd_completion; + + cmd_completion = cmd->cb_private; + BUG_ON(!cmd_completion); + + cmd_completion->st = st; + cmd_completion->creg_status = cmd->status; + complete(cmd_completion->cmd_done); +} + +static int __issue_creg_rw(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + unsigned int *hw_stat) +{ + DECLARE_COMPLETION_ONSTACK(cmd_done); + struct creg_completion completion; + unsigned long timeout; + int st; + + completion.cmd_done = &cmd_done; + completion.st = 0; + completion.creg_status = 0; + + st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb, + &completion); + if (st) + return st; + + /* + * This timeout is neccessary for unresponsive hardware. The additional + * 20 seconds to used to guarantee that each cregs requests has time to + * complete. + */ + timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC * + card->creg_ctrl.q_depth) + 20000); + + /* + * The creg interface is guaranteed to complete. It has a timeout + * mechanism that will kick in if hardware does not respond. + */ + st = wait_for_completion_timeout(completion.cmd_done, timeout); + if (st == 0) { + /* + * This is really bad, because the kernel timer did not + * expire and notify us of a timeout! + */ + dev_crit(CARD_TO_DEV(card), + "cregs timer failed\n"); + creg_reset(card); + return -EIO; + } + + *hw_stat = completion.creg_status; + + if (completion.st) { + dev_warn(CARD_TO_DEV(card), + "creg command failed(%d x%08x)\n", + completion.st, addr); + return completion.st; + } + + return 0; +} + +static int issue_creg_rw(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int stream, + int read) +{ + unsigned int hw_stat; + unsigned int xfer; + unsigned int op; + int st; + + op = read ? CREG_OP_READ : CREG_OP_WRITE; + + do { + xfer = min_t(unsigned int, size8, MAX_CREG_DATA8); + + st = __issue_creg_rw(card, op, addr, xfer, + data, stream, &hw_stat); + if (st) + return st; + + data = (char *)data + xfer; + addr += xfer; + size8 -= xfer; + } while (size8); + + return 0; +} + +/* ---------------------------- Public API ---------------------------------- */ +int rsxx_creg_write(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 0); +} + +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 1); +} + +int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state) +{ + return rsxx_creg_read(card, CREG_ADD_CARD_STATE, + sizeof(*state), state, 0); +} + +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8) +{ + unsigned int size; + int st; + + st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE, + sizeof(size), &size, 0); + if (st) + return st; + + *size8 = (u64)size * RSXX_HW_BLK_SIZE; + return 0; +} + +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets) +{ + return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS, + sizeof(*n_targets), n_targets, 0); +} + +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities) +{ + return rsxx_creg_read(card, CREG_ADD_CAPABILITIES, + sizeof(*capabilities), capabilities, 0); +} + +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd) +{ + return rsxx_creg_write(card, CREG_ADD_CARD_CMD, + sizeof(cmd), &cmd, 0); +} + + +/*----------------- HW Log Functions -------------------*/ +static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len) +{ + static char level; + + /* + * New messages start with "<#>", where # is the log level. Messages + * that extend past the log buffer will use the previous level + */ + if ((len > 3) && (str[0] == '<') && (str[2] == '>')) { + level = str[1]; + str += 3; /* Skip past the log level. */ + len -= 3; + } + + switch (level) { + case '0': + dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '1': + dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '2': + dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '3': + dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '4': + dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '5': + dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '6': + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '7': + dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + default: + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + } +} + +/* + * The substrncpy function copies the src string (which includes the + * terminating '\0' character), up to the count into the dest pointer. + * Returns the number of bytes copied to dest. + */ +static int substrncpy(char *dest, const char *src, int count) +{ + int max_cnt = count; + + while (count) { + count--; + *dest = *src; + if (*dest == '\0') + break; + src++; + dest++; + } + return max_cnt - count; +} + + +static void read_hw_log_done(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + char *buf; + char *log_str; + int cnt; + int len; + int off; + + buf = cmd->buf; + off = 0; + + /* Failed getting the log message */ + if (st) + return; + + while (off < cmd->cnt8) { + log_str = &card->log.buf[card->log.buf_len]; + cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len); + len = substrncpy(log_str, &buf[off], cnt); + + off += len; + card->log.buf_len += len; + + /* + * Flush the log if we've hit the end of a message or if we've + * run out of buffer space. + */ + if ((log_str[len - 1] == '\0') || + (card->log.buf_len == LOG_BUF_SIZE8)) { + if (card->log.buf_len != 1) /* Don't log blank lines. */ + hw_log_msg(card, card->log.buf, + card->log.buf_len); + card->log.buf_len = 0; + } + + } + + if (cmd->status & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +int rsxx_read_hw_log(struct rsxx_cardinfo *card) +{ + int st; + + st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG, + sizeof(card->log.tmp), card->log.tmp, + 1, read_hw_log_done, NULL); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed getting log text\n"); + + return st; +} + +/*-------------- IOCTL REG Access ------------------*/ +static int issue_reg_cmd(struct rsxx_cardinfo *card, + struct rsxx_reg_access *cmd, + int read) +{ + unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE; + + return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data, + cmd->stream, &cmd->stat); +} + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read) +{ + struct rsxx_reg_access cmd; + int st; + + st = copy_from_user(&cmd, ucmd, sizeof(cmd)); + if (st) + return -EFAULT; + + if (cmd.cnt > RSXX_MAX_REG_CNT) + return -EFAULT; + + st = issue_reg_cmd(card, &cmd, read); + if (st) + return st; + + st = put_user(cmd.stat, &ucmd->stat); + if (st) + return -EFAULT; + + if (read) { + st = copy_to_user(ucmd->data, cmd.data, cmd.cnt); + if (st) + return -EFAULT; + } + + return 0; +} + +/*------------ Initialization & Setup --------------*/ +int rsxx_creg_setup(struct rsxx_cardinfo *card) +{ + card->creg_ctrl.active_cmd = NULL; + + INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done); + mutex_init(&card->creg_ctrl.reset_lock); + INIT_LIST_HEAD(&card->creg_ctrl.queue); + spin_lock_init(&card->creg_ctrl.lock); + setup_timer(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out, + (unsigned long) card); + + return 0; +} + +void rsxx_creg_destroy(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + struct creg_cmd *tmp; + int cnt = 0; + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + cnt++; + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Canceled %d queue creg commands\n", cnt); + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + dev_info(CARD_TO_DEV(card), + "Canceled active creg command\n"); + kmem_cache_free(creg_cmd_pool, cmd); + } + spin_unlock(&card->creg_ctrl.lock); + + cancel_work_sync(&card->creg_ctrl.done_work); +} + + +int rsxx_creg_init(void) +{ + creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN); + if (!creg_cmd_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_creg_cleanup(void) +{ + kmem_cache_destroy(creg_cmd_pool); +} diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c new file mode 100644 index 000000000000..4346d17d2949 --- /dev/null +++ b/drivers/block/rsxx/dev.c @@ -0,0 +1,367 @@ +/* +* Filename: dev.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/slab.h> + +#include <linux/hdreg.h> +#include <linux/genhd.h> +#include <linux/blkdev.h> +#include <linux/bio.h> + +#include <linux/fs.h> + +#include "rsxx_priv.h" + +static unsigned int blkdev_minors = 64; +module_param(blkdev_minors, uint, 0444); +MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)"); + +/* + * For now I'm making this tweakable in case any applications hit this limit. + * If you see a "bio too big" error in the log you will need to raise this + * value. + */ +static unsigned int blkdev_max_hw_sectors = 1024; +module_param(blkdev_max_hw_sectors, uint, 0444); +MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO"); + +static unsigned int enable_blkdev = 1; +module_param(enable_blkdev , uint, 0444); +MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces"); + + +struct rsxx_bio_meta { + struct bio *bio; + atomic_t pending_dmas; + atomic_t error; + unsigned long start_time; +}; + +static struct kmem_cache *bio_meta_pool; + +/*----------------- Block Device Operations -----------------*/ +static int rsxx_blkdev_ioctl(struct block_device *bdev, + fmode_t mode, + unsigned int cmd, + unsigned long arg) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + + switch (cmd) { + case RSXX_GETREG: + return rsxx_reg_access(card, (void __user *)arg, 1); + case RSXX_SETREG: + return rsxx_reg_access(card, (void __user *)arg, 0); + } + + return -ENOTTY; +} + +static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + u64 blocks = card->size8 >> 9; + + /* + * get geometry: Fake it. I haven't found any drivers that set + * geo->start, so we won't either. + */ + if (card->size8) { + geo->heads = 64; + geo->sectors = 16; + do_div(blocks, (geo->heads * geo->sectors)); + geo->cylinders = blocks; + } else { + geo->heads = 0; + geo->sectors = 0; + geo->cylinders = 0; + } + return 0; +} + +static const struct block_device_operations rsxx_fops = { + .owner = THIS_MODULE, + .getgeo = rsxx_getgeo, + .ioctl = rsxx_blkdev_ioctl, +}; + +static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio) +{ + struct hd_struct *part0 = &card->gendisk->part0; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_round_stats(cpu, part0); + part_inc_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void disk_stats_complete(struct rsxx_cardinfo *card, + struct bio *bio, + unsigned long start_time) +{ + struct hd_struct *part0 = &card->gendisk->part0; + unsigned long duration = jiffies - start_time; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_stat_add(cpu, part0, sectors[rw], bio_sectors(bio)); + part_stat_inc(cpu, part0, ios[rw]); + part_stat_add(cpu, part0, ticks[rw], duration); + + part_round_stats(cpu, part0); + part_dec_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void bio_dma_done_cb(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int error) +{ + struct rsxx_bio_meta *meta = cb_data; + + if (error) + atomic_set(&meta->error, 1); + + if (atomic_dec_and_test(&meta->pending_dmas)) { + disk_stats_complete(card, meta->bio, meta->start_time); + + bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0); + kmem_cache_free(bio_meta_pool, meta); + } +} + +static void rsxx_make_request(struct request_queue *q, struct bio *bio) +{ + struct rsxx_cardinfo *card = q->queuedata; + struct rsxx_bio_meta *bio_meta; + int st = -EINVAL; + + might_sleep(); + + if (unlikely(card->halt)) { + st = -EFAULT; + goto req_err; + } + + if (unlikely(card->dma_fault)) { + st = (-EFAULT); + goto req_err; + } + + if (bio->bi_size == 0) { + dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); + goto req_err; + } + + bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL); + if (!bio_meta) { + st = -ENOMEM; + goto req_err; + } + + bio_meta->bio = bio; + atomic_set(&bio_meta->error, 0); + atomic_set(&bio_meta->pending_dmas, 0); + bio_meta->start_time = jiffies; + + disk_stats_start(card, bio); + + dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", + bio_data_dir(bio) ? 'W' : 'R', bio_meta, + (u64)bio->bi_sector << 9, bio->bi_size); + + st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas, + bio_dma_done_cb, bio_meta); + if (st) + goto queue_err; + + return; + +queue_err: + kmem_cache_free(bio_meta_pool, bio_meta); +req_err: + bio_endio(bio, st); +} + +/*----------------- Device Setup -------------------*/ +static bool rsxx_discard_supported(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + return (pci_rev >= RSXX_DISCARD_SUPPORT); +} + +static unsigned short rsxx_get_logical_block_size( + struct rsxx_cardinfo *card) +{ + u32 capabilities = 0; + int st; + + st = rsxx_get_card_capabilities(card, &capabilities); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed reading card capabilities register\n"); + + /* Earlier firmware did not have support for 512 byte accesses */ + if (capabilities & CARD_CAP_SUBPAGE_WRITES) + return 512; + else + return RSXX_HW_BLK_SIZE; +} + +int rsxx_attach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + /* The block device requires the stripe size from the config. */ + if (enable_blkdev) { + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + else + set_capacity(card->gendisk, 0); + add_disk(card->gendisk); + + card->bdev_attached = 1; + } + + mutex_unlock(&card->dev_lock); + + return 0; +} + +void rsxx_detach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + if (card->bdev_attached) { + del_gendisk(card->gendisk); + card->bdev_attached = 0; + } + + mutex_unlock(&card->dev_lock); +} + +int rsxx_setup_dev(struct rsxx_cardinfo *card) +{ + unsigned short blk_size; + + mutex_init(&card->dev_lock); + + if (!enable_blkdev) + return 0; + + card->major = register_blkdev(0, DRIVER_NAME); + if (card->major < 0) { + dev_err(CARD_TO_DEV(card), "Failed to get major number\n"); + return -ENOMEM; + } + + card->queue = blk_alloc_queue(GFP_KERNEL); + if (!card->queue) { + dev_err(CARD_TO_DEV(card), "Failed queue alloc\n"); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + card->gendisk = alloc_disk(blkdev_minors); + if (!card->gendisk) { + dev_err(CARD_TO_DEV(card), "Failed disk alloc\n"); + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + blk_size = rsxx_get_logical_block_size(card); + + blk_queue_make_request(card->queue, rsxx_make_request); + blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY); + blk_queue_dma_alignment(card->queue, blk_size - 1); + blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); + blk_queue_logical_block_size(card->queue, blk_size); + blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); + + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); + if (rsxx_discard_supported(card)) { + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue); + blk_queue_max_discard_sectors(card->queue, + RSXX_HW_BLK_SIZE >> 9); + card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_zeroes_data = 1; + } + + card->queue->queuedata = card; + + snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name), + "rsxx%d", card->disk_id); + card->gendisk->driverfs_dev = &card->dev->dev; + card->gendisk->major = card->major; + card->gendisk->first_minor = 0; + card->gendisk->fops = &rsxx_fops; + card->gendisk->private_data = card; + card->gendisk->queue = card->queue; + + return 0; +} + +void rsxx_destroy_dev(struct rsxx_cardinfo *card) +{ + if (!enable_blkdev) + return; + + put_disk(card->gendisk); + card->gendisk = NULL; + + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); +} + +int rsxx_dev_init(void) +{ + bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN); + if (!bio_meta_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_dev_cleanup(void) +{ + kmem_cache_destroy(bio_meta_pool); +} + + diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c new file mode 100644 index 000000000000..63176e67662f --- /dev/null +++ b/drivers/block/rsxx/dma.c @@ -0,0 +1,998 @@ +/* +* Filename: dma.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/slab.h> +#include "rsxx_priv.h" + +struct rsxx_dma { + struct list_head list; + u8 cmd; + unsigned int laddr; /* Logical address on the ramsan */ + struct { + u32 off; + u32 cnt; + } sub_page; + dma_addr_t dma_addr; + struct page *page; + unsigned int pg_off; /* Page Offset */ + rsxx_dma_cb cb; + void *cb_data; +}; + +/* This timeout is used to detect a stalled DMA channel */ +#define DMA_ACTIVITY_TIMEOUT msecs_to_jiffies(10000) + +struct hw_status { + u8 status; + u8 tag; + __le16 count; + __le32 _rsvd2; + __le64 _rsvd3; +} __packed; + +enum rsxx_dma_status { + DMA_SW_ERR = 0x1, + DMA_HW_FAULT = 0x2, + DMA_CANCELLED = 0x4, +}; + +struct hw_cmd { + u8 command; + u8 tag; + u8 _rsvd; + u8 sub_page; /* Bit[0:2]: 512byte offset */ + /* Bit[4:6]: 512byte count */ + __le32 device_addr; + __le64 host_addr; +} __packed; + +enum rsxx_hw_cmd { + HW_CMD_BLK_DISCARD = 0x70, + HW_CMD_BLK_WRITE = 0x80, + HW_CMD_BLK_READ = 0xC0, + HW_CMD_BLK_RECON_READ = 0xE0, +}; + +enum rsxx_hw_status { + HW_STATUS_CRC = 0x01, + HW_STATUS_HARD_ERR = 0x02, + HW_STATUS_SOFT_ERR = 0x04, + HW_STATUS_FAULT = 0x08, +}; + +#define STATUS_BUFFER_SIZE8 4096 +#define COMMAND_BUFFER_SIZE8 4096 + +static struct kmem_cache *rsxx_dma_pool; + +struct dma_tracker { + int next_tag; + struct rsxx_dma *dma; +}; + +#define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \ + (sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS)) + +struct dma_tracker_list { + spinlock_t lock; + int head; + struct dma_tracker list[0]; +}; + + +/*----------------- Misc Utility Functions -------------------*/ +static unsigned int rsxx_addr8_to_laddr(u64 addr8, struct rsxx_cardinfo *card) +{ + unsigned long long tgt_addr8; + + tgt_addr8 = ((addr8 >> card->_stripe.upper_shift) & + card->_stripe.upper_mask) | + ((addr8) & card->_stripe.lower_mask); + do_div(tgt_addr8, RSXX_HW_BLK_SIZE); + return tgt_addr8; +} + +static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8) +{ + unsigned int tgt; + + tgt = (addr8 >> card->_stripe.target_shift) & card->_stripe.target_mask; + + return tgt; +} + +static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) +{ + /* Reset all DMA Command/Status Queues */ + iowrite32(DMA_QUEUE_RESET, card->regmap + RESET); +} + +static unsigned int get_dma_size(struct rsxx_dma *dma) +{ + if (dma->sub_page.cnt) + return dma->sub_page.cnt << 9; + else + return RSXX_HW_BLK_SIZE; +} + + +/*----------------- DMA Tracker -------------------*/ +static void set_tracker_dma(struct dma_tracker_list *trackers, + int tag, + struct rsxx_dma *dma) +{ + trackers->list[tag].dma = dma; +} + +static struct rsxx_dma *get_tracker_dma(struct dma_tracker_list *trackers, + int tag) +{ + return trackers->list[tag].dma; +} + +static int pop_tracker(struct dma_tracker_list *trackers) +{ + int tag; + + spin_lock(&trackers->lock); + tag = trackers->head; + if (tag != -1) { + trackers->head = trackers->list[tag].next_tag; + trackers->list[tag].next_tag = -1; + } + spin_unlock(&trackers->lock); + + return tag; +} + +static void push_tracker(struct dma_tracker_list *trackers, int tag) +{ + spin_lock(&trackers->lock); + trackers->list[tag].next_tag = trackers->head; + trackers->head = tag; + trackers->list[tag].dma = NULL; + spin_unlock(&trackers->lock); +} + + +/*----------------- Interrupt Coalescing -------------*/ +/* + * Interrupt Coalescing Register Format: + * Interrupt Timer (64ns units) [15:0] + * Interrupt Count [24:16] + * Reserved [31:25] +*/ +#define INTR_COAL_LATENCY_MASK (0x0000ffff) + +#define INTR_COAL_COUNT_SHIFT 16 +#define INTR_COAL_COUNT_BITS 9 +#define INTR_COAL_COUNT_MASK (((1 << INTR_COAL_COUNT_BITS) - 1) << \ + INTR_COAL_COUNT_SHIFT) +#define INTR_COAL_LATENCY_UNITS_NS 64 + + +static u32 dma_intr_coal_val(u32 mode, u32 count, u32 latency) +{ + u32 latency_units = latency / INTR_COAL_LATENCY_UNITS_NS; + + if (mode == RSXX_INTR_COAL_DISABLED) + return 0; + + return ((count << INTR_COAL_COUNT_SHIFT) & INTR_COAL_COUNT_MASK) | + (latency_units & INTR_COAL_LATENCY_MASK); + +} + +static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) +{ + int i; + u32 q_depth = 0; + u32 intr_coal; + + if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE) + return; + + for (i = 0; i < card->n_targets; i++) + q_depth += atomic_read(&card->ctrl[i].stats.hw_q_depth); + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + q_depth / 2, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); +} + +/*----------------- RSXX DMA Handling -------------------*/ +static void rsxx_complete_dma(struct rsxx_cardinfo *card, + struct rsxx_dma *dma, + unsigned int status) +{ + if (status & DMA_SW_ERR) + printk_ratelimited(KERN_ERR + "SW Error in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_HW_FAULT) + printk_ratelimited(KERN_ERR + "HW Fault in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_CANCELLED) + printk_ratelimited(KERN_ERR + "DMA Cancelled(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma), + dma->cmd == HW_CMD_BLK_WRITE ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + + if (dma->cb) + dma->cb(card, dma->cb_data, status ? 1 : 0); + + kmem_cache_free(rsxx_dma_pool, dma); +} + +static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma) +{ + /* + * Requeued DMAs go to the front of the queue so they are issued + * first. + */ + spin_lock(&ctrl->queue_lock); + list_add(&dma->list, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); +} + +static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma, + u8 hw_st) +{ + unsigned int status = 0; + int requeue_cmd = 0; + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Handling DMA error(cmd x%02x, laddr x%08x st:x%02x)\n", + dma->cmd, dma->laddr, hw_st); + + if (hw_st & HW_STATUS_CRC) + ctrl->stats.crc_errors++; + if (hw_st & HW_STATUS_HARD_ERR) + ctrl->stats.hard_errors++; + if (hw_st & HW_STATUS_SOFT_ERR) + ctrl->stats.soft_errors++; + + switch (dma->cmd) { + case HW_CMD_BLK_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + if (ctrl->card->scrub_hard) { + dma->cmd = HW_CMD_BLK_RECON_READ; + requeue_cmd = 1; + ctrl->stats.reads_retried++; + } else { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + } else if (hw_st & HW_STATUS_FAULT) { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_RECON_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + /* Data could not be reconstructed. */ + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_WRITE: + status |= DMA_HW_FAULT; + ctrl->stats.writes_failed++; + + break; + case HW_CMD_BLK_DISCARD: + status |= DMA_HW_FAULT; + ctrl->stats.discards_failed++; + + break; + default: + dev_err(CARD_TO_DEV(ctrl->card), + "Unknown command in DMA!(cmd: x%02x " + "laddr x%08x st: x%02x\n", + dma->cmd, dma->laddr, hw_st); + status |= DMA_SW_ERR; + + break; + } + + if (requeue_cmd) + rsxx_requeue_dma(ctrl, dma); + else + rsxx_complete_dma(ctrl->card, dma, status); +} + +static void dma_engine_stalled(unsigned long data) +{ + struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + return; + + if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) { + /* + * The dma engine was stalled because the SW_CMD_IDX write + * was lost. Issue it again to recover. + */ + dev_warn(CARD_TO_DEV(ctrl->card), + "SW_CMD_IDX write was lost, re-writing...\n"); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + } else { + dev_warn(CARD_TO_DEV(ctrl->card), + "DMA channel %d has stalled, faulting interface.\n", + ctrl->id); + ctrl->card->dma_fault = 1; + } +} + +static void rsxx_issue_dmas(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int tag; + int cmds_pending = 0; + struct hw_cmd *hw_cmd_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); + hw_cmd_buf = ctrl->cmd.buf; + + if (unlikely(ctrl->card->halt)) + return; + + while (1) { + spin_lock(&ctrl->queue_lock); + if (list_empty(&ctrl->queue)) { + spin_unlock(&ctrl->queue_lock); + break; + } + spin_unlock(&ctrl->queue_lock); + + tag = pop_tracker(ctrl->trackers); + if (tag == -1) + break; + + spin_lock(&ctrl->queue_lock); + dma = list_entry(ctrl->queue.next, struct rsxx_dma, list); + list_del(&dma->list); + ctrl->stats.sw_q_depth--; + spin_unlock(&ctrl->queue_lock); + + /* + * This will catch any DMAs that slipped in right before the + * fault, but was queued after all the other DMAs were + * cancelled. + */ + if (unlikely(ctrl->card->dma_fault)) { + push_tracker(ctrl->trackers, tag); + rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED); + continue; + } + + set_tracker_dma(ctrl->trackers, tag, dma); + hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd; + hw_cmd_buf[ctrl->cmd.idx].tag = tag; + hw_cmd_buf[ctrl->cmd.idx]._rsvd = 0; + hw_cmd_buf[ctrl->cmd.idx].sub_page = + ((dma->sub_page.cnt & 0x7) << 4) | + (dma->sub_page.off & 0x7); + + hw_cmd_buf[ctrl->cmd.idx].device_addr = + cpu_to_le32(dma->laddr); + + hw_cmd_buf[ctrl->cmd.idx].host_addr = + cpu_to_le64(dma->dma_addr); + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Issue DMA%d(laddr %d tag %d) to idx %d\n", + ctrl->id, dma->laddr, tag, ctrl->cmd.idx); + + ctrl->cmd.idx = (ctrl->cmd.idx + 1) & RSXX_CS_IDX_MASK; + cmds_pending++; + + if (dma->cmd == HW_CMD_BLK_WRITE) + ctrl->stats.writes_issued++; + else if (dma->cmd == HW_CMD_BLK_DISCARD) + ctrl->stats.discards_issued++; + else + ctrl->stats.reads_issued++; + } + + /* Let HW know we've queued commands. */ + if (cmds_pending) { + /* + * We must guarantee that the CPU writes to 'ctrl->cmd.buf' + * (which is in PCI-consistent system-memory) from the loop + * above make it into the coherency domain before the + * following PIO "trigger" updating the cmd.idx. A WMB is + * sufficient. We need not explicitly CPU cache-flush since + * the memory is a PCI-consistent (ie; coherent) mapping. + */ + wmb(); + + atomic_add(cmds_pending, &ctrl->stats.hw_q_depth); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + } +} + +static void rsxx_dma_done(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + unsigned long flags; + u16 count; + u8 status; + u8 tag; + struct hw_status *hw_st_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); + hw_st_buf = ctrl->status.buf; + + if (unlikely(ctrl->card->halt) || + unlikely(ctrl->card->dma_fault)) + return; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + + while (count == ctrl->e_cnt) { + /* + * The read memory-barrier is necessary to keep aggressive + * processors/optimizers (such as the PPC Apple G5) from + * reordering the following status-buffer tag & status read + * *before* the count read on subsequent iterations of the + * loop! + */ + rmb(); + + status = hw_st_buf[ctrl->status.idx].status; + tag = hw_st_buf[ctrl->status.idx].tag; + + dma = get_tracker_dma(ctrl->trackers, tag); + if (dma == NULL) { + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_disable_ier(ctrl->card, CR_INTR_DMA_ALL); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + dev_err(CARD_TO_DEV(ctrl->card), + "No tracker for tag %d " + "(idx %d id %d)\n", + tag, ctrl->status.idx, ctrl->id); + return; + } + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Completing DMA%d" + "(laddr x%x tag %d st: x%x cnt: x%04x) from idx %d.\n", + ctrl->id, dma->laddr, tag, status, count, + ctrl->status.idx); + + atomic_dec(&ctrl->stats.hw_q_depth); + + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + + if (status) + rsxx_handle_dma_error(ctrl, dma, status); + else + rsxx_complete_dma(ctrl->card, dma, 0); + + push_tracker(ctrl->trackers, tag); + + ctrl->status.idx = (ctrl->status.idx + 1) & + RSXX_CS_IDX_MASK; + ctrl->e_cnt++; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + } + + dma_intr_coal_auto_tune(ctrl->card); + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + del_timer_sync(&ctrl->activity_timer); + + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id)); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + spin_lock(&ctrl->queue_lock); + if (ctrl->stats.sw_q_depth) + queue_work(ctrl->issue_wq, &ctrl->issue_dma_work); + spin_unlock(&ctrl->queue_lock); +} + +static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card, + struct list_head *q) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int cnt = 0; + + list_for_each_entry_safe(dma, tmp, q, list) { + list_del(&dma->list); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + + return cnt; +} + +static int rsxx_queue_discard(struct rsxx_cardinfo *card, + struct list_head *q, + unsigned int laddr, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->cmd = HW_CMD_BLK_DISCARD; + dma->laddr = laddr; + dma->dma_addr = 0; + dma->sub_page.off = 0; + dma->sub_page.cnt = 0; + dma->page = NULL; + dma->pg_off = 0; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), "Queuing[D] laddr %x\n", dma->laddr); + + list_add_tail(&dma->list, q); + + return 0; +} + +static int rsxx_queue_dma(struct rsxx_cardinfo *card, + struct list_head *q, + int dir, + unsigned int dma_off, + unsigned int dma_len, + unsigned int laddr, + struct page *page, + unsigned int pg_off, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len, + dir ? PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + if (!dma->dma_addr) { + kmem_cache_free(rsxx_dma_pool, dma); + return -ENOMEM; + } + + dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; + dma->laddr = laddr; + dma->sub_page.off = (dma_off >> 9); + dma->sub_page.cnt = (dma_len >> 9); + dma->page = page; + dma->pg_off = pg_off; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), + "Queuing[%c] laddr %x off %d cnt %d page %p pg_off %d\n", + dir ? 'W' : 'R', dma->laddr, dma->sub_page.off, + dma->sub_page.cnt, dma->page, dma->pg_off); + + /* Queue the DMA */ + list_add_tail(&dma->list, q); + + return 0; +} + +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data) +{ + struct list_head dma_list[RSXX_MAX_TARGETS]; + struct bio_vec *bvec; + unsigned long long addr8; + unsigned int laddr; + unsigned int bv_len; + unsigned int bv_off; + unsigned int dma_off; + unsigned int dma_len; + int dma_cnt[RSXX_MAX_TARGETS]; + int tgt; + int st; + int i; + + addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */ + atomic_set(n_dmas, 0); + + for (i = 0; i < card->n_targets; i++) { + INIT_LIST_HEAD(&dma_list[i]); + dma_cnt[i] = 0; + } + + if (bio->bi_rw & REQ_DISCARD) { + bv_len = bio->bi_size; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + + st = rsxx_queue_discard(card, &dma_list[tgt], laddr, + cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += RSXX_HW_BLK_SIZE; + bv_len -= RSXX_HW_BLK_SIZE; + } + } else { + bio_for_each_segment(bvec, bio, i) { + bv_len = bvec->bv_len; + bv_off = bvec->bv_offset; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + dma_off = addr8 & RSXX_HW_BLK_MASK; + dma_len = min(bv_len, + RSXX_HW_BLK_SIZE - dma_off); + + st = rsxx_queue_dma(card, &dma_list[tgt], + bio_data_dir(bio), + dma_off, dma_len, + laddr, bvec->bv_page, + bv_off, cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += dma_len; + bv_off += dma_len; + bv_len -= dma_len; + } + } + } + + for (i = 0; i < card->n_targets; i++) { + if (!list_empty(&dma_list[i])) { + spin_lock(&card->ctrl[i].queue_lock); + card->ctrl[i].stats.sw_q_depth += dma_cnt[i]; + list_splice_tail(&dma_list[i], &card->ctrl[i].queue); + spin_unlock(&card->ctrl[i].queue_lock); + + queue_work(card->ctrl[i].issue_wq, + &card->ctrl[i].issue_dma_work); + } + } + + return 0; + +bvec_err: + for (i = 0; i < card->n_targets; i++) + rsxx_cleanup_dma_queue(card, &dma_list[i]); + + return st; +} + + +/*----------------- DMA Engine Initialization & Setup -------------------*/ +static int rsxx_dma_ctrl_init(struct pci_dev *dev, + struct rsxx_dma_ctrl *ctrl) +{ + int i; + + memset(&ctrl->stats, 0, sizeof(ctrl->stats)); + + ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, + &ctrl->status.dma_addr); + ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, + &ctrl->cmd.dma_addr); + if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) + return -ENOMEM; + + ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8); + if (!ctrl->trackers) + return -ENOMEM; + + ctrl->trackers->head = 0; + for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) { + ctrl->trackers->list[i].next_tag = i + 1; + ctrl->trackers->list[i].dma = NULL; + } + ctrl->trackers->list[RSXX_MAX_OUTSTANDING_CMDS-1].next_tag = -1; + spin_lock_init(&ctrl->trackers->lock); + + spin_lock_init(&ctrl->queue_lock); + INIT_LIST_HEAD(&ctrl->queue); + + setup_timer(&ctrl->activity_timer, dma_engine_stalled, + (unsigned long)ctrl); + + ctrl->issue_wq = alloc_ordered_workqueue(DRIVER_NAME"_issue", 0); + if (!ctrl->issue_wq) + return -ENOMEM; + + ctrl->done_wq = alloc_ordered_workqueue(DRIVER_NAME"_done", 0); + if (!ctrl->done_wq) + return -ENOMEM; + + INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas); + INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); + + memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_LO); + iowrite32(upper_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_HI); + + memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); + iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); + + ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); + if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading status cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); + iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); + + ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); + if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + + wmb(); + + return 0; +} + +static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card, + unsigned int stripe_size8) +{ + if (!is_power_of_2(stripe_size8)) { + dev_err(CARD_TO_DEV(card), + "stripe_size is NOT a power of 2!\n"); + return -EINVAL; + } + + card->_stripe.lower_mask = stripe_size8 - 1; + + card->_stripe.upper_mask = ~(card->_stripe.lower_mask); + card->_stripe.upper_shift = ffs(card->n_targets) - 1; + + card->_stripe.target_mask = card->n_targets - 1; + card->_stripe.target_shift = ffs(stripe_size8) - 1; + + dev_dbg(CARD_TO_DEV(card), "_stripe.lower_mask = x%016llx\n", + card->_stripe.lower_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_shift = x%016llx\n", + card->_stripe.upper_shift); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_mask = x%016llx\n", + card->_stripe.upper_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_mask = x%016llx\n", + card->_stripe.target_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_shift = x%016llx\n", + card->_stripe.target_shift); + + return 0; +} + +static int rsxx_dma_configure(struct rsxx_cardinfo *card) +{ + u32 intr_coal; + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + card->config.data.intr_coal.count, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); + + return rsxx_dma_stripe_setup(card, card->config.data.stripe_size); +} + +int rsxx_dma_setup(struct rsxx_cardinfo *card) +{ + unsigned long flags; + int st; + int i; + + dev_info(CARD_TO_DEV(card), + "Initializing %d DMA targets\n", + card->n_targets); + + /* Regmap is divided up into 4K chunks. One for each DMA channel */ + for (i = 0; i < card->n_targets; i++) + card->ctrl[i].regmap = card->regmap + (i * 4096); + + card->dma_fault = 0; + + /* Reset the DMA queues */ + rsxx_dma_queue_reset(card); + + /************* Setup DMA Control *************/ + for (i = 0; i < card->n_targets; i++) { + st = rsxx_dma_ctrl_init(card->dev, &card->ctrl[i]); + if (st) + goto failed_dma_setup; + + card->ctrl[i].card = card; + card->ctrl[i].id = i; + } + + card->scrub_hard = 1; + + if (card->config_valid) + rsxx_dma_configure(card); + + /* Enable the interrupts after all setup has completed. */ + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + return 0; + +failed_dma_setup: + for (i = 0; i < card->n_targets; i++) { + struct rsxx_dma_ctrl *ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (ctrl->trackers) + vfree(ctrl->trackers); + + if (ctrl->status.buf) + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, + ctrl->status.dma_addr); + if (ctrl->cmd.buf) + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } + + return st; +} + + +void rsxx_dma_destroy(struct rsxx_cardinfo *card) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int i, j; + int cnt = 0; + + for (i = 0; i < card->n_targets; i++) { + ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (timer_pending(&ctrl->activity_timer)) + del_timer_sync(&ctrl->activity_timer); + + /* Clean up the DMA queue */ + spin_lock(&ctrl->queue_lock); + cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d queued DMAs on channel %d\n", + cnt, i); + + /* Clean up issued DMAs */ + for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { + dma = get_tracker_dma(ctrl->trackers, j); + if (dma) { + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d pending DMAs on channel %d\n", + cnt, i); + + vfree(ctrl->trackers); + + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, ctrl->status.dma_addr); + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } +} + + +int rsxx_dma_init(void) +{ + rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN); + if (!rsxx_dma_pool) + return -ENOMEM; + + return 0; +} + + +void rsxx_dma_cleanup(void) +{ + kmem_cache_destroy(rsxx_dma_pool); +} + diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h new file mode 100644 index 000000000000..2e50b65902b7 --- /dev/null +++ b/drivers/block/rsxx/rsxx.h @@ -0,0 +1,45 @@ +/* +* Filename: rsxx.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_H__ +#define __RSXX_H__ + +/*----------------- IOCTL Definitions -------------------*/ + +struct rsxx_reg_access { + __u32 addr; + __u32 cnt; + __u32 stat; + __u32 stream; + __u32 data[8]; +}; + +#define RSXX_MAX_REG_CNT (8 * (sizeof(__u32))) + +#define RSXX_IOC_MAGIC 'r' + +#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access) +#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access) + +#endif /* __RSXX_H_ */ diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h new file mode 100644 index 000000000000..c025fe5fdb70 --- /dev/null +++ b/drivers/block/rsxx/rsxx_cfg.h @@ -0,0 +1,72 @@ +/* +* Filename: rsXX_cfg.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_CFG_H__ +#define __RSXX_CFG_H__ + +/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */ +#include <linux/types.h> + +/* + * The card config version must match the driver's expected version. If it does + * not, the DMA interfaces will not be attached and the user will need to + * initialize/upgrade the card configuration using the card config utility. + */ +#define RSXX_CFG_VERSION 4 + +struct card_cfg_hdr { + __u32 version; + __u32 crc; +}; + +struct card_cfg_data { + __u32 block_size; + __u32 stripe_size; + __u32 vendor_id; + __u32 cache_order; + struct { + __u32 mode; /* Disabled, manual, auto-tune... */ + __u32 count; /* Number of intr to coalesce */ + __u32 latency;/* Max wait time (in ns) */ + } intr_coal; +}; + +struct rsxx_card_cfg { + struct card_cfg_hdr hdr; + struct card_cfg_data data; +}; + +/* Vendor ID Values */ +#define RSXX_VENDOR_ID_TMS_IBM 0 +#define RSXX_VENDOR_ID_DSI 1 +#define RSXX_VENDOR_COUNT 2 + +/* Interrupt Coalescing Values */ +#define RSXX_INTR_COAL_DISABLED 0 +#define RSXX_INTR_COAL_EXPLICIT 1 +#define RSXX_INTR_COAL_AUTO_TUNE 2 + + +#endif /* __RSXX_CFG_H__ */ + diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h new file mode 100644 index 000000000000..a1ac907d8f4c --- /dev/null +++ b/drivers/block/rsxx/rsxx_priv.h @@ -0,0 +1,399 @@ +/* +* Filename: rsxx_priv.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_PRIV_H__ +#define __RSXX_PRIV_H__ + +#include <linux/version.h> +#include <linux/semaphore.h> + +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/sysfs.h> +#include <linux/workqueue.h> +#include <linux/bio.h> +#include <linux/vmalloc.h> +#include <linux/timer.h> +#include <linux/ioctl.h> + +#include "rsxx.h" +#include "rsxx_cfg.h" + +struct proc_cmd; + +#define PCI_VENDOR_ID_TMS_IBM 0x15B6 +#define PCI_DEVICE_ID_RS70_FLASH 0x0019 +#define PCI_DEVICE_ID_RS70D_FLASH 0x001A +#define PCI_DEVICE_ID_RS80_FLASH 0x001C +#define PCI_DEVICE_ID_RS81_FLASH 0x001E + +#define RS70_PCI_REV_SUPPORTED 4 + +#define DRIVER_NAME "rsxx" +#define DRIVER_VERSION "3.7" + +/* Block size is 4096 */ +#define RSXX_HW_BLK_SHIFT 12 +#define RSXX_HW_BLK_SIZE (1 << RSXX_HW_BLK_SHIFT) +#define RSXX_HW_BLK_MASK (RSXX_HW_BLK_SIZE - 1) + +#define MAX_CREG_DATA8 32 +#define LOG_BUF_SIZE8 128 + +#define RSXX_MAX_OUTSTANDING_CMDS 255 +#define RSXX_CS_IDX_MASK 0xff + +#define RSXX_MAX_TARGETS 8 + +struct dma_tracker_list; + +/* DMA Command/Status Buffer structure */ +struct rsxx_cs_buffer { + dma_addr_t dma_addr; + void *buf; + u32 idx; +}; + +struct rsxx_dma_stats { + u32 crc_errors; + u32 hard_errors; + u32 soft_errors; + u32 writes_issued; + u32 writes_failed; + u32 reads_issued; + u32 reads_failed; + u32 reads_retried; + u32 discards_issued; + u32 discards_failed; + u32 done_rescheduled; + u32 issue_rescheduled; + u32 sw_q_depth; /* Number of DMAs on the SW queue. */ + atomic_t hw_q_depth; /* Number of DMAs queued to HW. */ +}; + +struct rsxx_dma_ctrl { + struct rsxx_cardinfo *card; + int id; + void __iomem *regmap; + struct rsxx_cs_buffer status; + struct rsxx_cs_buffer cmd; + u16 e_cnt; + spinlock_t queue_lock; + struct list_head queue; + struct workqueue_struct *issue_wq; + struct work_struct issue_dma_work; + struct workqueue_struct *done_wq; + struct work_struct dma_done_work; + struct timer_list activity_timer; + struct dma_tracker_list *trackers; + struct rsxx_dma_stats stats; +}; + +struct rsxx_cardinfo { + struct pci_dev *dev; + unsigned int halt; + + void __iomem *regmap; + spinlock_t irq_lock; + unsigned int isr_mask; + unsigned int ier_mask; + + struct rsxx_card_cfg config; + int config_valid; + + /* Embedded CPU Communication */ + struct { + spinlock_t lock; + bool active; + struct creg_cmd *active_cmd; + struct work_struct done_work; + struct list_head queue; + unsigned int q_depth; + /* Cache the creg status to prevent ioreads */ + struct { + u32 stat; + u32 failed_cancel_timer; + u32 creg_timeout; + } creg_stats; + struct timer_list cmd_timer; + struct mutex reset_lock; + int reset; + } creg_ctrl; + + struct { + char tmp[MAX_CREG_DATA8]; + char buf[LOG_BUF_SIZE8]; /* terminated */ + int buf_len; + } log; + + struct work_struct event_work; + unsigned int state; + u64 size8; + + /* Lock the device attach/detach function */ + struct mutex dev_lock; + + /* Block Device Variables */ + bool bdev_attached; + int disk_id; + int major; + struct request_queue *queue; + struct gendisk *gendisk; + struct { + /* Used to convert a byte address to a device address. */ + u64 lower_mask; + u64 upper_shift; + u64 upper_mask; + u64 target_mask; + u64 target_shift; + } _stripe; + unsigned int dma_fault; + + int scrub_hard; + + int n_targets; + struct rsxx_dma_ctrl *ctrl; +}; + +enum rsxx_pci_regmap { + HWID = 0x00, /* Hardware Identification Register */ + SCRATCH = 0x04, /* Scratch/Debug Register */ + RESET = 0x08, /* Reset Register */ + ISR = 0x10, /* Interrupt Status Register */ + IER = 0x14, /* Interrupt Enable Register */ + IPR = 0x18, /* Interrupt Poll Register */ + CB_ADD_LO = 0x20, /* Command Host Buffer Address [31:0] */ + CB_ADD_HI = 0x24, /* Command Host Buffer Address [63:32]*/ + HW_CMD_IDX = 0x28, /* Hardware Processed Command Index */ + SW_CMD_IDX = 0x2C, /* Software Processed Command Index */ + SB_ADD_LO = 0x30, /* Status Host Buffer Address [31:0] */ + SB_ADD_HI = 0x34, /* Status Host Buffer Address [63:32] */ + HW_STATUS_CNT = 0x38, /* Hardware Status Counter */ + SW_STATUS_CNT = 0x3C, /* Deprecated */ + CREG_CMD = 0x40, /* CPU Command Register */ + CREG_ADD = 0x44, /* CPU Address Register */ + CREG_CNT = 0x48, /* CPU Count Register */ + CREG_STAT = 0x4C, /* CPU Status Register */ + CREG_DATA0 = 0x50, /* CPU Data Registers */ + CREG_DATA1 = 0x54, + CREG_DATA2 = 0x58, + CREG_DATA3 = 0x5C, + CREG_DATA4 = 0x60, + CREG_DATA5 = 0x64, + CREG_DATA6 = 0x68, + CREG_DATA7 = 0x6c, + INTR_COAL = 0x70, /* Interrupt Coalescing Register */ + HW_ERROR = 0x74, /* Card Error Register */ + PCI_DEBUG0 = 0x78, /* PCI Debug Registers */ + PCI_DEBUG1 = 0x7C, + PCI_DEBUG2 = 0x80, + PCI_DEBUG3 = 0x84, + PCI_DEBUG4 = 0x88, + PCI_DEBUG5 = 0x8C, + PCI_DEBUG6 = 0x90, + PCI_DEBUG7 = 0x94, + PCI_POWER_THROTTLE = 0x98, + PERF_CTRL = 0x9c, + PERF_TIMER_LO = 0xa0, + PERF_TIMER_HI = 0xa4, + PERF_RD512_LO = 0xa8, + PERF_RD512_HI = 0xac, + PERF_WR512_LO = 0xb0, + PERF_WR512_HI = 0xb4, +}; + +enum rsxx_intr { + CR_INTR_DMA0 = 0x00000001, + CR_INTR_CREG = 0x00000002, + CR_INTR_DMA1 = 0x00000004, + CR_INTR_EVENT = 0x00000008, + CR_INTR_DMA2 = 0x00000010, + CR_INTR_DMA3 = 0x00000020, + CR_INTR_DMA4 = 0x00000040, + CR_INTR_DMA5 = 0x00000080, + CR_INTR_DMA6 = 0x00000100, + CR_INTR_DMA7 = 0x00000200, + CR_INTR_DMA_ALL = 0x000003f5, + CR_INTR_ALL = 0xffffffff, +}; + +static inline int CR_INTR_DMA(int N) +{ + static const unsigned int _CR_INTR_DMA[] = { + CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3, + CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7 + }; + return _CR_INTR_DMA[N]; +} +enum rsxx_pci_reset { + DMA_QUEUE_RESET = 0x00000001, +}; + +enum rsxx_pci_revision { + RSXX_DISCARD_SUPPORT = 2, +}; + +enum rsxx_creg_cmd { + CREG_CMD_TAG_MASK = 0x0000FF00, + CREG_OP_WRITE = 0x000000C0, + CREG_OP_READ = 0x000000E0, +}; + +enum rsxx_creg_addr { + CREG_ADD_CARD_CMD = 0x80001000, + CREG_ADD_CARD_STATE = 0x80001004, + CREG_ADD_CARD_SIZE = 0x8000100c, + CREG_ADD_CAPABILITIES = 0x80001050, + CREG_ADD_LOG = 0x80002000, + CREG_ADD_NUM_TARGETS = 0x80003000, + CREG_ADD_CONFIG = 0xB0000000, +}; + +enum rsxx_creg_card_cmd { + CARD_CMD_STARTUP = 1, + CARD_CMD_SHUTDOWN = 2, + CARD_CMD_LOW_LEVEL_FORMAT = 3, + CARD_CMD_FPGA_RECONFIG_BR = 4, + CARD_CMD_FPGA_RECONFIG_MAIN = 5, + CARD_CMD_BACKUP = 6, + CARD_CMD_RESET = 7, + CARD_CMD_deprecated = 8, + CARD_CMD_UNINITIALIZE = 9, + CARD_CMD_DSTROY_EMERGENCY = 10, + CARD_CMD_DSTROY_NORMAL = 11, + CARD_CMD_DSTROY_EXTENDED = 12, + CARD_CMD_DSTROY_ABORT = 13, +}; + +enum rsxx_card_state { + CARD_STATE_SHUTDOWN = 0x00000001, + CARD_STATE_STARTING = 0x00000002, + CARD_STATE_FORMATTING = 0x00000004, + CARD_STATE_UNINITIALIZED = 0x00000008, + CARD_STATE_GOOD = 0x00000010, + CARD_STATE_SHUTTING_DOWN = 0x00000020, + CARD_STATE_FAULT = 0x00000040, + CARD_STATE_RD_ONLY_FAULT = 0x00000080, + CARD_STATE_DSTROYING = 0x00000100, +}; + +enum rsxx_led { + LED_DEFAULT = 0x0, + LED_IDENTIFY = 0x1, + LED_SOAK = 0x2, +}; + +enum rsxx_creg_flash_lock { + CREG_FLASH_LOCK = 1, + CREG_FLASH_UNLOCK = 2, +}; + +enum rsxx_card_capabilities { + CARD_CAP_SUBPAGE_WRITES = 0x00000080, +}; + +enum rsxx_creg_stat { + CREG_STAT_STATUS_MASK = 0x00000003, + CREG_STAT_SUCCESS = 0x1, + CREG_STAT_ERROR = 0x2, + CREG_STAT_CHAR_PENDING = 0x00000004, /* Character I/O pending bit */ + CREG_STAT_LOG_PENDING = 0x00000008, /* HW log message pending bit */ + CREG_STAT_TAG_MASK = 0x0000ff00, +}; + +static inline unsigned int CREG_DATA(int N) +{ + return CREG_DATA0 + (N << 2); +} + +/*----------------- Convenient Log Wrappers -------------------*/ +#define CARD_TO_DEV(__CARD) (&(__CARD)->dev->dev) + +/***** config.c *****/ +int rsxx_load_config(struct rsxx_cardinfo *card); + +/***** core.c *****/ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); + +/***** dev.c *****/ +int rsxx_attach_dev(struct rsxx_cardinfo *card); +void rsxx_detach_dev(struct rsxx_cardinfo *card); +int rsxx_setup_dev(struct rsxx_cardinfo *card); +void rsxx_destroy_dev(struct rsxx_cardinfo *card); +int rsxx_dev_init(void); +void rsxx_dev_cleanup(void); + +/***** dma.c ****/ +typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int status); +int rsxx_dma_setup(struct rsxx_cardinfo *card); +void rsxx_dma_destroy(struct rsxx_cardinfo *card); +int rsxx_dma_init(void); +void rsxx_dma_cleanup(void); +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data); + +/***** cregs.c *****/ +int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_read_hw_log(struct rsxx_cardinfo *card); +int rsxx_get_card_state(struct rsxx_cardinfo *card, + unsigned int *state); +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8); +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets); +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities); +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd); +int rsxx_creg_setup(struct rsxx_cardinfo *card); +void rsxx_creg_destroy(struct rsxx_cardinfo *card); +int rsxx_creg_init(void); +void rsxx_creg_cleanup(void); + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read); + + + +#endif /* __DRIVERS_BLOCK_RSXX_H__ */ diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 57763c54363a..758f2ac878cf 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1090,10 +1090,13 @@ static const struct block_device_operations floppy_fops = { static void swim3_mb_event(struct macio_dev* mdev, int mb_state) { struct floppy_state *fs = macio_get_drvdata(mdev); - struct swim3 __iomem *sw = fs->swim3; + struct swim3 __iomem *sw; if (!fs) return; + + sw = fs->swim3; + if (mb_state != MB_FD) return; diff --git a/drivers/block/xd.c b/drivers/block/xd.c deleted file mode 100644 index ff540520bada..000000000000 --- a/drivers/block/xd.c +++ /dev/null @@ -1,1123 +0,0 @@ -/* - * This file contains the driver for an XT hard disk controller - * (at least the DTC 5150X) for Linux. - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, - * kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and - * Wim Van Dorst. - * - * Revised: 04/04/94 by Risto Kankkunen - * Moved the detection code from xd_init() to xd_geninit() as it needed - * interrupts enabled and Linus didn't want to enable them in that first - * phase. xd_geninit() is the place to do these kinds of things anyway, - * he says. - * - * Modularized: 04/10/96 by Todd Fries, tfries@umr.edu - * - * Revised: 13/12/97 by Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl - * Fixed some problems with disk initialization and module initiation. - * Added support for manual geometry setting (except Seagate controllers) - * in form: - * xd_geo=<cyl_xda>,<head_xda>,<sec_xda>[,<cyl_xdb>,<head_xdb>,<sec_xdb>] - * Recovered DMA access. Abridged messages. Added support for DTC5051CX, - * WD1002-27X & XEBEC controllers. Driver uses now some jumper settings. - * Extended ioctl() support. - * - * Bugfix: 15/02/01, Paul G. - inform queue layer of tiny xd_maxsect. - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/genhd.h> -#include <linux/hdreg.h> -#include <linux/ioport.h> -#include <linux/init.h> -#include <linux/wait.h> -#include <linux/blkdev.h> -#include <linux/mutex.h> -#include <linux/blkpg.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/gfp.h> - -#include <asm/uaccess.h> -#include <asm/dma.h> - -#include "xd.h" - -static DEFINE_MUTEX(xd_mutex); -static void __init do_xd_setup (int *integers); -#ifdef MODULE -static int xd[5] = { -1,-1,-1,-1, }; -#endif - -#define XD_DONT_USE_DMA 0 /* Initial value. may be overriden using - "nodma" module option */ -#define XD_INIT_DISK_DELAY (30) /* 30 ms delay during disk initialization */ - -/* Above may need to be increased if a problem with the 2nd drive detection - (ST11M controller) or resetting a controller (WD) appears */ - -static XD_INFO xd_info[XD_MAXDRIVES]; - -/* If you try this driver and find that your card is not detected by the driver at bootup, you need to add your BIOS - signature and details to the following list of signatures. A BIOS signature is a string embedded into the first - few bytes of your controller's on-board ROM BIOS. To find out what yours is, use something like MS-DOS's DEBUG - command. Run DEBUG, and then you can examine your BIOS signature with: - - d xxxx:0000 - - where xxxx is the segment of your controller (like C800 or D000 or something). On the ASCII dump at the right, you should - be able to see a string mentioning the manufacturer's copyright etc. Add this string into the table below. The parameters - in the table are, in order: - - offset ; this is the offset (in bytes) from the start of your ROM where the signature starts - signature ; this is the actual text of the signature - xd_?_init_controller ; this is the controller init routine used by your controller - xd_?_init_drive ; this is the drive init routine used by your controller - - The controllers directly supported at the moment are: DTC 5150x, WD 1004A27X, ST11M/R and override. If your controller is - made by the same manufacturer as one of these, try using the same init routines as they do. If that doesn't work, your - best bet is to use the "override" routines. These routines use a "portable" method of getting the disk's geometry, and - may work with your card. If none of these seem to work, try sending me some email and I'll see what I can do <grin>. - - NOTE: You can now specify your XT controller's parameters from the command line in the form xd=TYPE,IRQ,IO,DMA. The driver - should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */ - -#include <asm/page.h> -#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size)) -#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size)) -static char *xd_dma_buffer; - -static XD_SIGNATURE xd_sigs[] __initdata = { - { 0x0000,"Override geometry handler",NULL,xd_override_init_drive,"n unknown" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"[BXD06 (C) DTC 17-MAY-1985]",xd_dtc_init_controller,xd_dtc5150cx_init_drive," DTC 5150CX" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x000B,"CRD18A Not an IBM rom. (C) Copyright Data Technology Corp. 05/31/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Todd Fries, tfries@umr.edu */ - { 0x000B,"CXD23A Not an IBM ROM (C)Copyright Data Technology Corp 12/03/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"07/15/86(C) Copyright 1986 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. 1002-27X" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"06/24/88(C) Copyright 1988 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. WDXT-GEN2" }, /* Dan Newcombe, newcombe@aa.csc.peachnet.edu */ - { 0x0015,"SEAGATE ST11 BIOS REVISION",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Salvador Abreu, spa@fct.unl.pt */ - { 0x0010,"ST11R BIOS",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Risto Kankkunen, risto.kankkunen@cs.helsinki.fi */ - { 0x0010,"ST11 BIOS v1.7",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11R" }, /* Alan Hourihane, alanh@fairlite.demon.co.uk */ - { 0x1000,"(c)Copyright 1987 SMS",xd_omti_init_controller,xd_omti_init_drive,"n OMTI 5520" }, /* Dirk Melchers, dirk@merlin.nbg.sub.org */ - { 0x0006,"COPYRIGHT XEBEC (C) 1984",xd_xebec_init_controller,xd_xebec_init_drive," XEBEC" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"(C) Copyright 1984 Western Digital Corp", xd_wd_init_controller, xd_wd_init_drive," Western Dig. 1002s-wx2" }, - { 0x0008,"(C) Copyright 1986 Western Digital Corporation", xd_wd_init_controller, xd_wd_init_drive," 1986 Western Digital" }, /* jfree@sovereign.org */ -}; - -static unsigned int xd_bases[] __initdata = -{ - 0xC8000, 0xCA000, 0xCC000, - 0xCE000, 0xD0000, 0xD2000, - 0xD4000, 0xD6000, 0xD8000, - 0xDA000, 0xDC000, 0xDE000, - 0xE0000 -}; - -static DEFINE_SPINLOCK(xd_lock); - -static struct gendisk *xd_gendisk[2]; - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); - -static const struct block_device_operations xd_fops = { - .owner = THIS_MODULE, - .ioctl = xd_ioctl, - .getgeo = xd_getgeo, -}; -static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); -static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors; -static u_char xd_override __initdata = 0, xd_type __initdata = 0; -static u_short xd_iobase = 0x320; -static int xd_geo[XD_MAXDRIVES*3] __initdata = { 0, }; - -static volatile int xdc_busy; -static struct timer_list xd_watchdog_int; - -static volatile u_char xd_error; -static bool nodma = XD_DONT_USE_DMA; - -static struct request_queue *xd_queue; - -/* xd_init: register the block device number and set up pointer tables */ -static int __init xd_init(void) -{ - u_char i,controller; - unsigned int address; - int err; - -#ifdef MODULE - { - u_char count = 0; - for (i = 4; i > 0; i--) - if (((xd[i] = xd[i-1]) >= 0) && !count) - count = i; - if ((xd[0] = count)) - do_xd_setup(xd); - } -#endif - - init_timer (&xd_watchdog_int); xd_watchdog_int.function = xd_watchdog; - - err = -EBUSY; - if (register_blkdev(XT_DISK_MAJOR, "xd")) - goto out1; - - err = -ENOMEM; - xd_queue = blk_init_queue(do_xd_request, &xd_lock); - if (!xd_queue) - goto out1a; - - if (xd_detect(&controller,&address)) { - - printk("Detected a%s controller (type %d) at address %06x\n", - xd_sigs[controller].name,controller,address); - if (!request_region(xd_iobase,4,"xd")) { - printk("xd: Ports at 0x%x are not available\n", - xd_iobase); - goto out2; - } - if (controller) - xd_sigs[controller].init_controller(address); - xd_drives = xd_initdrives(xd_sigs[controller].init_drive); - - printk("Detected %d hard drive%s (using IRQ%d & DMA%d)\n", - xd_drives,xd_drives == 1 ? "" : "s",xd_irq,xd_dma); - } - - /* - * With the drive detected, xd_maxsectors should now be known. - * If xd_maxsectors is 0, nothing was detected and we fall through - * to return -ENODEV - */ - if (!xd_dma_buffer && xd_maxsectors) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - printk(KERN_ERR "xd: Out of memory.\n"); - goto out3; - } - } - - err = -ENODEV; - if (!xd_drives) - goto out3; - - for (i = 0; i < xd_drives; i++) { - XD_INFO *p = &xd_info[i]; - struct gendisk *disk = alloc_disk(64); - if (!disk) - goto Enomem; - p->unit = i; - disk->major = XT_DISK_MAJOR; - disk->first_minor = i<<6; - sprintf(disk->disk_name, "xd%c", i+'a'); - disk->fops = &xd_fops; - disk->private_data = p; - disk->queue = xd_queue; - set_capacity(disk, p->heads * p->cylinders * p->sectors); - printk(" %s: CHS=%d/%d/%d\n", disk->disk_name, - p->cylinders, p->heads, p->sectors); - xd_gendisk[i] = disk; - } - - err = -EBUSY; - if (request_irq(xd_irq,xd_interrupt_handler, 0, "XT hard disk", NULL)) { - printk("xd: unable to get IRQ%d\n",xd_irq); - goto out4; - } - - if (request_dma(xd_dma,"xd")) { - printk("xd: unable to get DMA%d\n",xd_dma); - goto out5; - } - - /* xd_maxsectors depends on controller - so set after detection */ - blk_queue_max_hw_sectors(xd_queue, xd_maxsectors); - - for (i = 0; i < xd_drives; i++) - add_disk(xd_gendisk[i]); - - return 0; - -out5: - free_irq(xd_irq, NULL); -out4: - for (i = 0; i < xd_drives; i++) - put_disk(xd_gendisk[i]); -out3: - if (xd_maxsectors) - release_region(xd_iobase,4); - - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); -out2: - blk_cleanup_queue(xd_queue); -out1a: - unregister_blkdev(XT_DISK_MAJOR, "xd"); -out1: - return err; -Enomem: - err = -ENOMEM; - while (i--) - put_disk(xd_gendisk[i]); - goto out3; -} - -/* xd_detect: scan the possible BIOS ROM locations for the signature strings */ -static u_char __init xd_detect (u_char *controller, unsigned int *address) -{ - int i, j; - - if (xd_override) - { - *controller = xd_type; - *address = 0; - return(1); - } - - for (i = 0; i < ARRAY_SIZE(xd_bases); i++) { - void __iomem *p = ioremap(xd_bases[i], 0x2000); - if (!p) - continue; - for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) { - const char *s = xd_sigs[j].string; - if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) { - *controller = j; - xd_type = j; - *address = xd_bases[i]; - iounmap(p); - return 1; - } - } - iounmap(p); - } - return 0; -} - -/* do_xd_request: handle an incoming request */ -static void do_xd_request (struct request_queue * q) -{ - struct request *req; - - if (xdc_busy) - return; - - req = blk_fetch_request(q); - while (req) { - unsigned block = blk_rq_pos(req); - unsigned count = blk_rq_cur_sectors(req); - XD_INFO *disk = req->rq_disk->private_data; - int res = -EIO; - int retry; - - if (req->cmd_type != REQ_TYPE_FS) - goto done; - if (block + count > get_capacity(req->rq_disk)) - goto done; - for (retry = 0; (retry < XD_RETRIES) && !res; retry++) - res = xd_readwrite(rq_data_dir(req), disk, req->buffer, - block, count); - done: - /* wrap up, 0 = success, -errno = fail */ - if (!__blk_end_request_cur(req, res)) - req = blk_fetch_request(q); - } -} - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - XD_INFO *p = bdev->bd_disk->private_data; - - geo->heads = p->heads; - geo->sectors = p->sectors; - geo->cylinders = p->cylinders; - return 0; -} - -/* xd_ioctl: handle device ioctl's */ -static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg) -{ - switch (cmd) { - case HDIO_SET_DMA: - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (xdc_busy) return -EBUSY; - nodma = !arg; - if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); - xd_dma_buffer = NULL; - } else if (!nodma && !xd_dma_buffer) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - nodma = XD_DONT_USE_DMA; - return -ENOMEM; - } - } - return 0; - case HDIO_GET_DMA: - return put_user(!nodma, (long __user *) arg); - case HDIO_GET_MULTCOUNT: - return put_user(xd_maxsectors, (long __user *) arg); - default: - return -EINVAL; - } -} - -static int xd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long param) -{ - int ret; - - mutex_lock(&xd_mutex); - ret = xd_locked_ioctl(bdev, mode, cmd, param); - mutex_unlock(&xd_mutex); - - return ret; -} - -/* xd_readwrite: handle a read/write request */ -static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count) -{ - int drive = p->unit; - u_char cmdblk[6],sense[4]; - u_short track,cylinder; - u_char head,sector,control,mode = PIO_MODE,temp; - char **real_buffer; - register int i; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: operation = %s, drive = %d, buffer = 0x%X, block = %d, count = %d\n",operation == READ ? "read" : "write",drive,buffer,block,count); -#endif /* DEBUG_READWRITE */ - - spin_unlock_irq(&xd_lock); - - control = p->control; - if (!xd_dma_buffer) - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - while (count) { - temp = count < xd_maxsectors ? count : xd_maxsectors; - - track = block / p->sectors; - head = track % p->heads; - cylinder = track / p->heads; - sector = block % p->sectors; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: drive = %d, head = %d, cylinder = %d, sector = %d, count = %d\n",drive,head,cylinder,sector,temp); -#endif /* DEBUG_READWRITE */ - - if (xd_dma_buffer) { - mode = xd_setup_dma(operation == READ ? DMA_MODE_READ : DMA_MODE_WRITE,(u_char *)(xd_dma_buffer),temp * 0x200); - real_buffer = &xd_dma_buffer; - for (i=0; i < (temp * 0x200); i++) - xd_dma_buffer[i] = buffer[i]; - } - else - real_buffer = &buffer; - - xd_build(cmdblk,operation == READ ? CMD_READ : CMD_WRITE,drive,head,cylinder,sector,temp & 0xFF,control); - - switch (xd_command(cmdblk,mode,(u_char *)(*real_buffer),(u_char *)(*real_buffer),sense,XD_TIMEOUT)) { - case 1: - printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write")); - xd_recalibrate(drive); - spin_lock_irq(&xd_lock); - return -EIO; - case 2: - if (sense[0] & 0x30) { - printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing")); - switch ((sense[0] & 0x30) >> 4) { - case 0: printk("drive error, code = 0x%X",sense[0] & 0x0F); - break; - case 1: printk("controller error, code = 0x%X",sense[0] & 0x0F); - break; - case 2: printk("command error, code = 0x%X",sense[0] & 0x0F); - break; - case 3: printk("miscellaneous error, code = 0x%X",sense[0] & 0x0F); - break; - } - } - if (sense[0] & 0x80) - printk(" - CHS = %d/%d/%d\n",((sense[2] & 0xC0) << 2) | sense[3],sense[1] & 0x1F,sense[2] & 0x3F); - /* reported drive number = (sense[1] & 0xE0) >> 5 */ - else - printk(" - no valid disk address\n"); - spin_lock_irq(&xd_lock); - return -EIO; - } - if (xd_dma_buffer) - for (i=0; i < (temp * 0x200); i++) - buffer[i] = xd_dma_buffer[i]; - - count -= temp, buffer += temp * 0x200, block += temp; - } - spin_lock_irq(&xd_lock); - return 0; -} - -/* xd_recalibrate: recalibrate a given drive and reset controller if necessary */ -static void xd_recalibrate (u_char drive) -{ - u_char cmdblk[6]; - - xd_build(cmdblk,CMD_RECALIBRATE,drive,0,0,0,0,0); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 8)) - printk("xd%c: warning! error recalibrating, controller may be unstable\n", 'a'+drive); -} - -/* xd_interrupt_handler: interrupt service routine */ -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id) -{ - if (inb(XD_STATUS) & STAT_INTERRUPT) { /* check if it was our device */ -#ifdef DEBUG_OTHER - printk("xd_interrupt_handler: interrupt detected\n"); -#endif /* DEBUG_OTHER */ - outb(0,XD_CONTROL); /* acknowledge interrupt */ - wake_up(&xd_wait_int); /* and wake up sleeping processes */ - return IRQ_HANDLED; - } - else - printk("xd: unexpected interrupt\n"); - return IRQ_NONE; -} - -/* xd_setup_dma: set up the DMA controller for a data transfer */ -static u_char xd_setup_dma (u_char mode,u_char *buffer,u_int count) -{ - unsigned long f; - - if (nodma) - return (PIO_MODE); - if (((unsigned long) buffer & 0xFFFF0000) != (((unsigned long) buffer + count) & 0xFFFF0000)) { -#ifdef DEBUG_OTHER - printk("xd_setup_dma: using PIO, transfer overlaps 64k boundary\n"); -#endif /* DEBUG_OTHER */ - return (PIO_MODE); - } - - f=claim_dma_lock(); - disable_dma(xd_dma); - clear_dma_ff(xd_dma); - set_dma_mode(xd_dma,mode); - set_dma_addr(xd_dma, (unsigned long) buffer); - set_dma_count(xd_dma,count); - - release_dma_lock(f); - - return (DMA_MODE); /* use DMA and INT */ -} - -/* xd_build: put stuff into an array in a format suitable for the controller */ -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control) -{ - cmdblk[0] = command; - cmdblk[1] = ((drive & 0x07) << 5) | (head & 0x1F); - cmdblk[2] = ((cylinder & 0x300) >> 2) | (sector & 0x3F); - cmdblk[3] = cylinder & 0xFF; - cmdblk[4] = count; - cmdblk[5] = control; - - return (cmdblk); -} - -static void xd_watchdog (unsigned long unused) -{ - xd_error = 1; - wake_up(&xd_wait_int); -} - -/* xd_waitport: waits until port & mask == flags or a timeout occurs. return 1 for a timeout */ -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout) -{ - u_long expiry = jiffies + timeout; - int success; - - xdc_busy = 1; - while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry)) - schedule_timeout_uninterruptible(1); - xdc_busy = 0; - return (success); -} - -static inline u_int xd_wait_for_IRQ (void) -{ - unsigned long flags; - xd_watchdog_int.expires = jiffies + 8 * HZ; - add_timer(&xd_watchdog_int); - - flags=claim_dma_lock(); - enable_dma(xd_dma); - release_dma_lock(flags); - - sleep_on(&xd_wait_int); - del_timer(&xd_watchdog_int); - xdc_busy = 0; - - flags=claim_dma_lock(); - disable_dma(xd_dma); - release_dma_lock(flags); - - if (xd_error) { - printk("xd: missed IRQ - command aborted\n"); - xd_error = 0; - return (1); - } - return (0); -} - -/* xd_command: handle all data transfers necessary for a single command */ -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout) -{ - u_char cmdblk[6],csb,complete = 0; - -#ifdef DEBUG_COMMAND - printk("xd_command: command = 0x%X, mode = 0x%X, indata = 0x%X, outdata = 0x%X, sense = 0x%X\n",command,mode,indata,outdata,sense); -#endif /* DEBUG_COMMAND */ - - outb(0,XD_SELECT); - outb(mode,XD_CONTROL); - - if (xd_waitport(XD_STATUS,STAT_SELECT,STAT_SELECT,timeout)) - return (1); - - while (!complete) { - if (xd_waitport(XD_STATUS,STAT_READY,STAT_READY,timeout)) - return (1); - - switch (inb(XD_STATUS) & (STAT_COMMAND | STAT_INPUT)) { - case 0: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - outb(outdata ? *outdata++ : 0,XD_DATA); - break; - case STAT_INPUT: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - if (indata) - *indata++ = inb(XD_DATA); - else - inb(XD_DATA); - break; - case STAT_COMMAND: - outb(command ? *command++ : 0,XD_DATA); - break; - case STAT_COMMAND | STAT_INPUT: - complete = 1; - break; - } - } - csb = inb(XD_DATA); - - if (xd_waitport(XD_STATUS,0,STAT_SELECT,timeout)) /* wait until deselected */ - return (1); - - if (csb & CSB_ERROR) { /* read sense data if error */ - xd_build(cmdblk,CMD_SENSE,(csb & CSB_LUN) >> 5,0,0,0,0,0); - if (xd_command(cmdblk,0,sense,NULL,NULL,XD_TIMEOUT)) - printk("xd: warning! sense command failed!\n"); - } - -#ifdef DEBUG_COMMAND - printk("xd_command: completed with csb = 0x%X\n",csb); -#endif /* DEBUG_COMMAND */ - - return (csb & CSB_ERROR); -} - -static u_char __init xd_initdrives (void (*init_drive)(u_char drive)) -{ - u_char cmdblk[6],i,count = 0; - - for (i = 0; i < XD_MAXDRIVES; i++) { - xd_build(cmdblk,CMD_TESTREADY,i,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT*8)) { - msleep_interruptible(XD_INIT_DISK_DELAY); - - init_drive(count); - count++; - - msleep_interruptible(XD_INIT_DISK_DELAY); - } - } - return (count); -} - -static void __init xd_manual_geo_set (u_char drive) -{ - xd_info[drive].heads = (u_char)(xd_geo[3 * drive + 1]); - xd_info[drive].cylinders = (u_short)(xd_geo[3 * drive]); - xd_info[drive].sectors = (u_char)(xd_geo[3 * drive + 2]); -} - -static void __init xd_dtc_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; - case 0xD0000: /*5150CX*/ - case 0xD8000: break; /*5150CX & 5150XL*/ - default: printk("xd_dtc_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* my card seems to have trouble doing multi-block transfers? */ - - outb(0,XD_RESET); /* reset the controller */ -} - - -static void __init xd_dtc5150cx_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][4] = { - {0x200,8,0x200,0x100}, - {0x267,2,0x267,0x267}, - {0x264,4,0x264,0x80}, - {0x132,4,0x132,0x0}, - {0x132,2,0x80, 0x132}, - {0x177,8,0x177,0x0}, - {0x132,8,0x84, 0x0}, - {}, /* not used */ - {0x132,6,0x80, 0x100}, - {0x200,6,0x100,0x100}, - {0x264,2,0x264,0x80}, - {0x280,4,0x280,0x100}, - {0x2B9,3,0x2B9,0x2B9}, - {0x2B9,5,0x2B9,0x2B9}, - {0x280,6,0x280,0x100}, - {0x132,4,0x132,0x0}}; - u_char n; - - n = inb(XD_JUMPER); - n = (drive ? n : (n >> 2)) & 0x33; - n = (n | (n >> 2)) & 0x0F; - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else - if (n != 7) { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - else { - printk("xd%c: undetermined drive geometry\n",'a'+drive); - return; - } - xd_info[drive].control = 5; /* control byte */ - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -static void __init xd_dtc_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[64]; - - xd_build(cmdblk,CMD_DTCGETGEOM,drive,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x0A]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf))[0x04]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf + 1))[0x05]; /* reduced write */ - xd_info[drive].precomp = ((u_short *) (buf + 1))[0x06]; /* write precomp */ - xd_info[drive].ecc = buf[0x0F]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = 0; /* control byte */ - - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,((u_short *) (buf + 1))[0x05],((u_short *) (buf + 1))[0x06],buf[0x0F]); - xd_build(cmdblk,CMD_DTCSETSTEP,drive,0,0,0,0,7); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - printk("xd_dtc_init_drive: error setting step rate for xd%c\n", 'a'+drive); - } - else - printk("xd_dtc_init_drive: error reading geometry for xd%c\n", 'a'+drive); -} - -static void __init xd_wd_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; break; - case 0xCC000: xd_iobase = 0x328; break; - case 0xCE000: xd_iobase = 0x32C; break; - case 0xD0000: xd_iobase = 0x328; break; /* ? */ - case 0xD8000: xd_iobase = 0x32C; break; /* ? */ - default: printk("xd_wd_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* this one doesn't wrap properly either... */ - - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_wd_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS may be disabled */ - static u_short geometry_table[][4] = { - {0x264,4,0x1C2,0x1C2}, /* common part */ - {0x132,4,0x099,0x0}, - {0x267,2,0x1C2,0x1C2}, - {0x267,4,0x1C2,0x1C2}, - - {0x334,6,0x335,0x335}, /* 1004 series RLL */ - {0x30E,4,0x30F,0x3DC}, - {0x30E,2,0x30F,0x30F}, - {0x267,4,0x268,0x268}, - - {0x3D5,5,0x3D6,0x3D6}, /* 1002 series RLL */ - {0x3DB,7,0x3DC,0x3DC}, - {0x264,4,0x265,0x265}, - {0x267,4,0x268,0x268}}; - - u_char cmdblk[6],buf[0x200]; - u_char n = 0,rll,jumper_state,use_jumper_geo; - u_char wd_1002 = (xd_sigs[xd_type].string[7] == '6'); - - jumper_state = ~(inb(0x322)); - if (jumper_state & 0x40) - xd_irq = 9; - rll = (jumper_state & 0x30) ? (0x04 << wd_1002) : 0; - xd_build(cmdblk,CMD_READ,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x1AF]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf + 1))[0xD6]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf))[0xD8]; /* reduced write */ - xd_info[drive].wprecomp = ((u_short *) (buf))[0xDA]; /* write precomp */ - xd_info[drive].ecc = buf[0x1B4]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = buf[0x1B5]; /* control byte */ - use_jumper_geo = !(xd_info[drive].heads) || !(xd_info[drive].cylinders); - if (xd_geo[3*drive]) { - xd_manual_geo_set(drive); - xd_info[drive].control = rll ? 7 : 5; - } - else if (use_jumper_geo) { - n = (((jumper_state & 0x0F) >> (drive << 1)) & 0x03) | rll; - xd_info[drive].cylinders = geometry_table[n][0]; - xd_info[drive].heads = (u_char)(geometry_table[n][1]); - xd_info[drive].control = rll ? 7 : 5; -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; - xd_info[drive].wprecomp = geometry_table[n][3]; - xd_info[drive].ecc = 0x0B; -#endif /* 0 */ - } - if (!wd_1002) { - if (use_jumper_geo) - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - geometry_table[n][2],geometry_table[n][3],0x0B); - else - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - ((u_short *) (buf))[0xD8],((u_short *) (buf))[0xDA],buf[0x1B4]); - } - /* 1002 based RLL controller requests converted addressing, but reports physical - (physical 26 sec., logical 17 sec.) - 1004 based ???? */ - if (rll & wd_1002) { - if ((xd_info[drive].cylinders *= 26, - xd_info[drive].cylinders /= 17) > 1023) - xd_info[drive].cylinders = 1023; /* 1024 ? */ -#if 0 - xd_info[drive].rwrite *= 26; - xd_info[drive].rwrite /= 17; - xd_info[drive].wprecomp *= 26 - xd_info[drive].wprecomp /= 17; -#endif /* 0 */ - } - } - else - printk("xd_wd_init_drive: error reading geometry for xd%c\n",'a'+drive); - -} - -static void __init xd_seagate_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_seagate_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_seagate_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[0x200]; - - xd_build(cmdblk,CMD_ST11GETGEOM,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x04]; /* heads */ - xd_info[drive].cylinders = (buf[0x02] << 8) | buf[0x03]; /* cylinders */ - xd_info[drive].sectors = buf[0x05]; /* sectors */ - xd_info[drive].control = 0; /* control byte */ - } - else - printk("xd_seagate_init_drive: error reading geometry from xd%c\n", 'a'+drive); -} - -/* Omti support courtesy Dirk Melchers */ -static void __init xd_omti_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_omti_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_omti_init_drive (u_char drive) -{ - /* gets infos from drive */ - xd_override_init_drive(drive); - - /* set other parameters, Hardcoded, not that nice :-) */ - xd_info[drive].control = 2; -} - -/* Xebec support (AK) */ -static void __init xd_xebec_init_controller (unsigned int address) -{ -/* iobase may be set manually in range 0x300 - 0x33C - irq may be set manually to 2(9),3,4,5,6,7 - dma may be set manually to 1,2,3 - (How to detect them ???) -BIOS address may be set manually in range 0x0 - 0xF8000 -If you need non-standard settings use the xd=... command */ - - switch (address) { - case 0x00000: - case 0xC8000: /* initially: xd_iobase==0x320 */ - case 0xD0000: - case 0xD2000: - case 0xD4000: - case 0xD6000: - case 0xD8000: - case 0xDA000: - case 0xDC000: - case 0xDE000: - case 0xE0000: break; - default: printk("xd_xebec_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x01; - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_xebec_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][5] = { - {0x132,4,0x080,0x080,0x7}, - {0x132,4,0x080,0x080,0x17}, - {0x264,2,0x100,0x100,0x7}, - {0x264,2,0x100,0x100,0x17}, - {0x132,8,0x080,0x080,0x7}, - {0x132,8,0x080,0x080,0x17}, - {0x264,4,0x100,0x100,0x6}, - {0x264,4,0x100,0x100,0x17}, - {0x2BC,5,0x2BC,0x12C,0x6}, - {0x3A5,4,0x3A5,0x3A5,0x7}, - {0x26C,6,0x26C,0x26C,0x7}, - {0x200,8,0x200,0x100,0x17}, - {0x400,5,0x400,0x400,0x7}, - {0x400,6,0x400,0x400,0x7}, - {0x264,8,0x264,0x200,0x17}, - {0x33E,7,0x33E,0x200,0x7}}; - u_char n; - - n = inb(XD_JUMPER) & 0x0F; /* BIOS's drive number: same geometry - is assumed for BOTH drives */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - xd_info[drive].control = geometry_table[n][4]; /* control byte */ - xd_setparam(CMD_XBSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -/* xd_override_init_drive: this finds disk geometry in a "binary search" style, narrowing in on the "correct" number of heads - etc. by trying values until it gets the highest successful value. Idea courtesy Salvador Abreu (spa@fct.unl.pt). */ -static void __init xd_override_init_drive (u_char drive) -{ - u_short min[] = { 0,0,0 },max[] = { 16,1024,64 },test[] = { 0,0,0 }; - u_char cmdblk[6],i; - - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - for (i = 0; i < 3; i++) { - while (min[i] != max[i] - 1) { - test[i] = (min[i] + max[i]) / 2; - xd_build(cmdblk,CMD_SEEK,drive,(u_char) test[0],(u_short) test[1],(u_char) test[2],0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - min[i] = test[i]; - else - max[i] = test[i]; - } - test[i] = min[i]; - } - xd_info[drive].heads = (u_char) min[0] + 1; - xd_info[drive].cylinders = (u_short) min[1] + 1; - xd_info[drive].sectors = (u_char) min[2] + 1; - } - xd_info[drive].control = 0; -} - -/* xd_setup: initialise controller from command line parameters */ -static void __init do_xd_setup (int *integers) -{ - switch (integers[0]) { - case 4: if (integers[4] < 0) - nodma = 1; - else if (integers[4] < 8) - xd_dma = integers[4]; - case 3: if ((integers[3] > 0) && (integers[3] <= 0x3FC)) - xd_iobase = integers[3]; - case 2: if ((integers[2] > 0) && (integers[2] < 16)) - xd_irq = integers[2]; - case 1: xd_override = 1; - if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs))) - xd_type = integers[1]; - case 0: break; - default:printk("xd: too many parameters for xd\n"); - } - xd_maxsectors = 0x01; -} - -/* xd_setparam: set the drive characteristics */ -static void __init xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc) -{ - u_char cmdblk[14]; - - xd_build(cmdblk,command,drive,0,0,0,0,0); - cmdblk[6] = (u_char) (cylinders >> 8) & 0x03; - cmdblk[7] = (u_char) (cylinders & 0xFF); - cmdblk[8] = heads & 0x1F; - cmdblk[9] = (u_char) (rwrite >> 8) & 0x03; - cmdblk[10] = (u_char) (rwrite & 0xFF); - cmdblk[11] = (u_char) (wprecomp >> 8) & 0x03; - cmdblk[12] = (u_char) (wprecomp & 0xFF); - cmdblk[13] = ecc; - - /* Some controllers require geometry info as data, not command */ - - if (xd_command(cmdblk,PIO_MODE,NULL,&cmdblk[6],NULL,XD_TIMEOUT * 2)) - printk("xd: error setting characteristics for xd%c\n", 'a'+drive); -} - - -#ifdef MODULE - -module_param_array(xd, int, NULL, 0); -module_param_array(xd_geo, int, NULL, 0); -module_param(nodma, bool, 0); - -MODULE_LICENSE("GPL"); - -void cleanup_module(void) -{ - int i; - unregister_blkdev(XT_DISK_MAJOR, "xd"); - for (i = 0; i < xd_drives; i++) { - del_gendisk(xd_gendisk[i]); - put_disk(xd_gendisk[i]); - } - blk_cleanup_queue(xd_queue); - release_region(xd_iobase,4); - if (xd_drives) { - free_irq(xd_irq, NULL); - free_dma(xd_dma); - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); - } -} -#else - -static int __init xd_setup (char *str) -{ - int ints[5]; - get_options (str, ARRAY_SIZE (ints), ints); - do_xd_setup (ints); - return 1; -} - -/* xd_manual_geo_init: initialise drive geometry from command line parameters - (used only for WD drives) */ -static int __init xd_manual_geo_init (char *str) -{ - int i, integers[1 + 3*XD_MAXDRIVES]; - - get_options (str, ARRAY_SIZE (integers), integers); - if (integers[0]%3 != 0) { - printk("xd: incorrect number of parameters for xd_geo\n"); - return 1; - } - for (i = 0; (i < integers[0]) && (i < 3*XD_MAXDRIVES); i++) - xd_geo[i] = integers[i+1]; - return 1; -} - -__setup ("xd=", xd_setup); -__setup ("xd_geo=", xd_manual_geo_init); - -#endif /* MODULE */ - -module_init(xd_init); -MODULE_ALIAS_BLOCKDEV_MAJOR(XT_DISK_MAJOR); diff --git a/drivers/block/xd.h b/drivers/block/xd.h deleted file mode 100644 index 37cacef16e93..000000000000 --- a/drivers/block/xd.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef _LINUX_XD_H -#define _LINUX_XD_H - -/* - * This file contains the definitions for the IO ports and errors etc. for XT hard disk controllers (at least the DTC 5150X). - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and Wim Van Dorst. - */ - -#include <linux/interrupt.h> - -/* XT hard disk controller registers */ -#define XD_DATA (xd_iobase + 0x00) /* data RW register */ -#define XD_RESET (xd_iobase + 0x01) /* reset WO register */ -#define XD_STATUS (xd_iobase + 0x01) /* status RO register */ -#define XD_SELECT (xd_iobase + 0x02) /* select WO register */ -#define XD_JUMPER (xd_iobase + 0x02) /* jumper RO register */ -#define XD_CONTROL (xd_iobase + 0x03) /* DMAE/INTE WO register */ -#define XD_RESERVED (xd_iobase + 0x03) /* reserved */ - -/* XT hard disk controller commands (incomplete list) */ -#define CMD_TESTREADY 0x00 /* test drive ready */ -#define CMD_RECALIBRATE 0x01 /* recalibrate drive */ -#define CMD_SENSE 0x03 /* request sense */ -#define CMD_FORMATDRV 0x04 /* format drive */ -#define CMD_VERIFY 0x05 /* read verify */ -#define CMD_FORMATTRK 0x06 /* format track */ -#define CMD_FORMATBAD 0x07 /* format bad track */ -#define CMD_READ 0x08 /* read */ -#define CMD_WRITE 0x0A /* write */ -#define CMD_SEEK 0x0B /* seek */ - -/* Controller specific commands */ -#define CMD_DTCSETPARAM 0x0C /* set drive parameters (DTC 5150X & CX only?) */ -#define CMD_DTCGETECC 0x0D /* get ecc error length (DTC 5150X only?) */ -#define CMD_DTCREADBUF 0x0E /* read sector buffer (DTC 5150X only?) */ -#define CMD_DTCWRITEBUF 0x0F /* write sector buffer (DTC 5150X only?) */ -#define CMD_DTCREMAPTRK 0x11 /* assign alternate track (DTC 5150X only?) */ -#define CMD_DTCGETPARAM 0xFB /* get drive parameters (DTC 5150X only?) */ -#define CMD_DTCSETSTEP 0xFC /* set step rate (DTC 5150X only?) */ -#define CMD_DTCSETGEOM 0xFE /* set geometry data (DTC 5150X only?) */ -#define CMD_DTCGETGEOM 0xFF /* get geometry data (DTC 5150X only?) */ -#define CMD_ST11GETGEOM 0xF8 /* get geometry data (Seagate ST11R/M only?) */ -#define CMD_WDSETPARAM 0x0C /* set drive parameters (WD 1004A27X only?) */ -#define CMD_XBSETPARAM 0x0C /* set drive parameters (XEBEC only?) */ - -/* Bits for command status byte */ -#define CSB_ERROR 0x02 /* error */ -#define CSB_LUN 0x20 /* logical Unit Number */ - -/* XT hard disk controller status bits */ -#define STAT_READY 0x01 /* controller is ready */ -#define STAT_INPUT 0x02 /* data flowing from controller to host */ -#define STAT_COMMAND 0x04 /* controller in command phase */ -#define STAT_SELECT 0x08 /* controller is selected */ -#define STAT_REQUEST 0x10 /* controller requesting data */ -#define STAT_INTERRUPT 0x20 /* controller requesting interrupt */ - -/* XT hard disk controller control bits */ -#define PIO_MODE 0x00 /* control bits to set for PIO */ -#define DMA_MODE 0x03 /* control bits to set for DMA & interrupt */ - -#define XD_MAXDRIVES 2 /* maximum 2 drives */ -#define XD_TIMEOUT HZ /* 1 second timeout */ -#define XD_RETRIES 4 /* maximum 4 retries */ - -#undef DEBUG /* define for debugging output */ - -#ifdef DEBUG - #define DEBUG_STARTUP /* debug driver initialisation */ - #define DEBUG_OVERRIDE /* debug override geometry detection */ - #define DEBUG_READWRITE /* debug each read/write command */ - #define DEBUG_OTHER /* debug misc. interrupt/DMA stuff */ - #define DEBUG_COMMAND /* debug each controller command */ -#endif /* DEBUG */ - -/* this structure defines the XT drives and their types */ -typedef struct { - u_char heads; - u_short cylinders; - u_char sectors; - u_char control; - int unit; -} XD_INFO; - -/* this structure defines a ROM BIOS signature */ -typedef struct { - unsigned int offset; - const char *string; - void (*init_controller)(unsigned int address); - void (*init_drive)(u_char drive); - const char *name; -} XD_SIGNATURE; - -#ifndef MODULE -static int xd_manual_geo_init (char *command); -#endif /* MODULE */ -static u_char xd_detect (u_char *controller, unsigned int *address); -static u_char xd_initdrives (void (*init_drive)(u_char drive)); - -static void do_xd_request (struct request_queue * q); -static int xd_ioctl (struct block_device *bdev,fmode_t mode,unsigned int cmd,unsigned long arg); -static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count); -static void xd_recalibrate (u_char drive); - -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id); -static u_char xd_setup_dma (u_char opcode,u_char *buffer,u_int count); -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control); -static void xd_watchdog (unsigned long unused); -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout); -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout); - -/* card specific setup and geometry gathering code */ -static void xd_dtc_init_controller (unsigned int address); -static void xd_dtc5150cx_init_drive (u_char drive); -static void xd_dtc_init_drive (u_char drive); -static void xd_wd_init_controller (unsigned int address); -static void xd_wd_init_drive (u_char drive); -static void xd_seagate_init_controller (unsigned int address); -static void xd_seagate_init_drive (u_char drive); -static void xd_omti_init_controller (unsigned int address); -static void xd_omti_init_drive (u_char drive); -static void xd_xebec_init_controller (unsigned int address); -static void xd_xebec_init_drive (u_char drive); -static void xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc); -static void xd_override_init_drive (u_char drive); - -#endif /* _LINUX_XD_H */ diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5ac841ff6cc7..de1f319f7bd7 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -46,6 +46,7 @@ #include <xen/xen.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <xen/balloon.h> #include "common.h" /* @@ -239,6 +240,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); BUG_ON(ret); + free_xenballooned_pages(segs_to_unmap, pages); segs_to_unmap = 0; } @@ -527,8 +529,8 @@ static int xen_blkbk_map(struct blkif_request *req, GFP_KERNEL); if (!persistent_gnt) return -ENOMEM; - persistent_gnt->page = alloc_page(GFP_KERNEL); - if (!persistent_gnt->page) { + if (alloc_xenballooned_pages(1, &persistent_gnt->page, + false)) { kfree(persistent_gnt); return -ENOMEM; } @@ -879,7 +881,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, goto fail_response; } - preq.dev = req->u.rw.handle; preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 63980722db41..5e237f630c47 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -367,6 +367,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev) be->blkif = NULL; } + kfree(be->mode); kfree(be); dev_set_drvdata(&dev->dev, NULL); return 0; @@ -502,6 +503,7 @@ static void backend_changed(struct xenbus_watch *watch, = container_of(watch, struct backend_info, backend_watch); struct xenbus_device *dev = be->dev; int cdrom = 0; + unsigned long handle; char *device_type; DPRINTK(""); @@ -521,10 +523,10 @@ static void backend_changed(struct xenbus_watch *watch, return; } - if ((be->major || be->minor) && - ((be->major != major) || (be->minor != minor))) { - pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", - be->major, be->minor, major, minor); + if (be->major | be->minor) { + if (be->major != major || be->minor != minor) + pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", + be->major, be->minor, major, minor); return; } @@ -542,36 +544,33 @@ static void backend_changed(struct xenbus_watch *watch, kfree(device_type); } - if (be->major == 0 && be->minor == 0) { - /* Front end dir is a number, which is used as the handle. */ - - char *p = strrchr(dev->otherend, '/') + 1; - long handle; - err = strict_strtoul(p, 0, &handle); - if (err) - return; + /* Front end dir is a number, which is used as the handle. */ + err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); + if (err) + return; - be->major = major; - be->minor = minor; + be->major = major; + be->minor = minor; - err = xen_vbd_create(be->blkif, handle, major, minor, - (NULL == strchr(be->mode, 'w')), cdrom); - if (err) { - be->major = 0; - be->minor = 0; - xenbus_dev_fatal(dev, err, "creating vbd structure"); - return; - } + err = xen_vbd_create(be->blkif, handle, major, minor, + !strchr(be->mode, 'w'), cdrom); + if (err) + xenbus_dev_fatal(dev, err, "creating vbd structure"); + else { err = xenvbd_sysfs_addif(dev); if (err) { xen_vbd_free(&be->blkif->vbd); - be->major = 0; - be->minor = 0; xenbus_dev_fatal(dev, err, "creating sysfs entries"); - return; } + } + if (err) { + kfree(be->mode); + be->mode = NULL; + be->major = 0; + be->minor = 0; + } else { /* We're potentially connected now */ xen_update_blkif_status(be->blkif); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 11043c18ac5a..c3dae2e0f290 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { struct llist_node *all_gnts; - struct grant *persistent_gnt; + struct grant *persistent_gnt, *tmp; struct llist_node *n; /* Prevent new requests being issued until we fix things up. */ @@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* Remove all persistent grants */ if (info->persistent_gnts_c) { all_gnts = llist_del_all(&info->persistent_gnts); - llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) { + persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node); + while (persistent_gnt) { gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); __free_page(pfn_to_page(persistent_gnt->pfn)); - kfree(persistent_gnt); + tmp = persistent_gnt; + n = persistent_gnt->node.next; + if (n) + persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node); + else + persistent_gnt = NULL; + kfree(tmp); } info->persistent_gnts_c = 0; } diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index a4605fd7e303..47a673070d70 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -27,8 +27,10 @@ #include <linux/of_address.h> #include <linux/irq.h> #include <linux/module.h> -#include <asm/sched_clock.h> +#include <asm/sched_clock.h> +#include <asm/localtimer.h> +#include <linux/percpu.h> /* * Timer block registers. */ @@ -49,6 +51,7 @@ #define TIMER1_RELOAD_OFF 0x0018 #define TIMER1_VAL_OFF 0x001c +#define LCL_TIMER_EVENTS_STATUS 0x0028 /* Global timers are connected to the coherency fabric clock, and the below divider reduces their incrementing frequency. */ #define TIMER_DIVIDER_SHIFT 5 @@ -57,14 +60,17 @@ /* * SoC-specific data. */ -static void __iomem *timer_base; -static int timer_irq; +static void __iomem *timer_base, *local_base; +static unsigned int timer_clk; +static bool timer25Mhz = true; /* * Number of timer ticks per jiffy. */ static u32 ticks_per_jiffy; +static struct clock_event_device __percpu **percpu_armada_370_xp_evt; + static u32 notrace armada_370_xp_read_sched_clock(void) { return ~readl(timer_base + TIMER0_VAL_OFF); @@ -78,24 +84,23 @@ armada_370_xp_clkevt_next_event(unsigned long delta, struct clock_event_device *dev) { u32 u; - /* * Clear clockevent timer interrupt. */ - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); /* * Setup new clockevent timer value. */ - writel(delta, timer_base + TIMER1_VAL_OFF); + writel(delta, local_base + TIMER0_VAL_OFF); /* * Enable the timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - u = ((u & ~TIMER1_RELOAD_EN) | TIMER1_EN | - TIMER1_DIV(TIMER_DIVIDER_SHIFT)); - writel(u, timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + u = ((u & ~TIMER0_RELOAD_EN) | TIMER0_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)); + writel(u, local_base + TIMER_CTRL_OFF); return 0; } @@ -107,37 +112,38 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode, u32 u; if (mode == CLOCK_EVT_MODE_PERIODIC) { + /* * Setup timer to fire at 1/HZ intervals. */ - writel(ticks_per_jiffy - 1, timer_base + TIMER1_RELOAD_OFF); - writel(ticks_per_jiffy - 1, timer_base + TIMER1_VAL_OFF); + writel(ticks_per_jiffy - 1, local_base + TIMER0_RELOAD_OFF); + writel(ticks_per_jiffy - 1, local_base + TIMER0_VAL_OFF); /* * Enable timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - writel((u | TIMER1_EN | TIMER1_RELOAD_EN | - TIMER1_DIV(TIMER_DIVIDER_SHIFT)), - timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + + writel((u | TIMER0_EN | TIMER0_RELOAD_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)), + local_base + TIMER_CTRL_OFF); } else { /* * Disable timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - writel(u & ~TIMER1_EN, timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + writel(u & ~TIMER0_EN, local_base + TIMER_CTRL_OFF); /* * ACK pending timer interrupt. */ - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); - + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); } } static struct clock_event_device armada_370_xp_clkevt = { - .name = "armada_370_xp_tick", + .name = "armada_370_xp_per_cpu_tick", .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 300, @@ -150,32 +156,78 @@ static irqreturn_t armada_370_xp_timer_interrupt(int irq, void *dev_id) /* * ACK timer interrupt and call event handler. */ + struct clock_event_device *evt = *(struct clock_event_device **)dev_id; - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); - armada_370_xp_clkevt.event_handler(&armada_370_xp_clkevt); + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); + evt->event_handler(evt); return IRQ_HANDLED; } -static struct irqaction armada_370_xp_timer_irq = { - .name = "armada_370_xp_tick", - .flags = IRQF_DISABLED | IRQF_TIMER, - .handler = armada_370_xp_timer_interrupt +/* + * Setup the local clock events for a CPU. + */ +static int __cpuinit armada_370_xp_timer_setup(struct clock_event_device *evt) +{ + u32 u; + int cpu = smp_processor_id(); + + /* Use existing clock_event for cpu 0 */ + if (!smp_processor_id()) + return 0; + + u = readl(local_base + TIMER_CTRL_OFF); + if (timer25Mhz) + writel(u | TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + else + writel(u & ~TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + + evt->name = armada_370_xp_clkevt.name; + evt->irq = armada_370_xp_clkevt.irq; + evt->features = armada_370_xp_clkevt.features; + evt->shift = armada_370_xp_clkevt.shift; + evt->rating = armada_370_xp_clkevt.rating, + evt->set_next_event = armada_370_xp_clkevt_next_event, + evt->set_mode = armada_370_xp_clkevt_mode, + evt->cpumask = cpumask_of(cpu); + + *__this_cpu_ptr(percpu_armada_370_xp_evt) = evt; + + clockevents_config_and_register(evt, timer_clk, 1, 0xfffffffe); + enable_percpu_irq(evt->irq, 0); + + return 0; +} + +static void armada_370_xp_timer_stop(struct clock_event_device *evt) +{ + evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); + disable_percpu_irq(evt->irq); +} + +static struct local_timer_ops armada_370_xp_local_timer_ops __cpuinitdata = { + .setup = armada_370_xp_timer_setup, + .stop = armada_370_xp_timer_stop, }; void __init armada_370_xp_timer_init(void) { u32 u; struct device_node *np; - unsigned int timer_clk; + int res; + np = of_find_compatible_node(NULL, NULL, "marvell,armada-370-xp-timer"); timer_base = of_iomap(np, 0); WARN_ON(!timer_base); + local_base = of_iomap(np, 1); if (of_find_property(np, "marvell,timer-25Mhz", NULL)) { /* The fixed 25MHz timer is available so let's use it */ + u = readl(local_base + TIMER_CTRL_OFF); + writel(u | TIMER0_25MHZ, + local_base + TIMER_CTRL_OFF); u = readl(timer_base + TIMER_CTRL_OFF); - writel(u | TIMER0_25MHZ | TIMER1_25MHZ, + writel(u | TIMER0_25MHZ, timer_base + TIMER_CTRL_OFF); timer_clk = 25000000; } else { @@ -183,15 +235,23 @@ void __init armada_370_xp_timer_init(void) struct clk *clk = of_clk_get(np, 0); WARN_ON(IS_ERR(clk)); rate = clk_get_rate(clk); + u = readl(local_base + TIMER_CTRL_OFF); + writel(u & ~(TIMER0_25MHZ), + local_base + TIMER_CTRL_OFF); + u = readl(timer_base + TIMER_CTRL_OFF); - writel(u & ~(TIMER0_25MHZ | TIMER1_25MHZ), + writel(u & ~(TIMER0_25MHZ), timer_base + TIMER_CTRL_OFF); + timer_clk = rate / TIMER_DIVIDER; + timer25Mhz = false; } - /* We use timer 0 as clocksource, and timer 1 for - clockevents */ - timer_irq = irq_of_parse_and_map(np, 1); + /* + * We use timer 0 as clocksource, and private(local) timer 0 + * for clockevents + */ + armada_370_xp_clkevt.irq = irq_of_parse_and_map(np, 4); ticks_per_jiffy = (timer_clk + HZ / 2) / HZ; @@ -216,12 +276,26 @@ void __init armada_370_xp_timer_init(void) "armada_370_xp_clocksource", timer_clk, 300, 32, clocksource_mmio_readl_down); - /* - * Setup clockevent timer (interrupt-driven). - */ - setup_irq(timer_irq, &armada_370_xp_timer_irq); + /* Register the clockevent on the private timer of CPU 0 */ armada_370_xp_clkevt.cpumask = cpumask_of(0); clockevents_config_and_register(&armada_370_xp_clkevt, timer_clk, 1, 0xfffffffe); -} + percpu_armada_370_xp_evt = alloc_percpu(struct clock_event_device *); + + + /* + * Setup clockevent timer (interrupt-driven). + */ + *__this_cpu_ptr(percpu_armada_370_xp_evt) = &armada_370_xp_clkevt; + res = request_percpu_irq(armada_370_xp_clkevt.irq, + armada_370_xp_timer_interrupt, + armada_370_xp_clkevt.name, + percpu_armada_370_xp_evt); + if (!res) { + enable_percpu_irq(armada_370_xp_clkevt.irq, 0); +#ifdef CONFIG_LOCAL_TIMERS + local_timer_register(&armada_370_xp_local_timer_ops); +#endif + } +} diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index acb709bfac0f..e443f2c1dfd1 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -80,6 +80,29 @@ config EDAC_MM_EDAC occurred so that a particular failing memory module can be replaced. If unsure, select 'Y'. +config EDAC_GHES + bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" + depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) + default y + help + Not all machines support hardware-driven error report. Some of those + provide a BIOS-driven error report mechanism via ACPI, using the + APEI/GHES driver. By enabling this option, the error reports provided + by GHES are sent to userspace via the EDAC API. + + When this option is enabled, it will disable the hardware-driven + mechanisms, if a GHES BIOS is detected, entering into the + "Firmware First" mode. + + It should be noticed that keeping both GHES and a hardware-driven + error mechanism won't work well, as BIOS will race with OS, while + reading the error registers. So, if you want to not use "Firmware + first" GHES error mechanism, you should disable GHES either at + compilation time or by passing "ghes.disable=1" Kernel parameter + at boot time. + + In doubt, say 'Y'. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h" depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 5608a9ba61b7..4154ed6a02c6 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -16,6 +16,7 @@ ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o endif +obj-$(CONFIG_EDAC_GHES) += ghes_edac.o obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o edac_mce_amd-y := mce_amd.o diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 23bb99fa44f1..3c2625e7980d 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page); + +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e); + void edac_mc_handle_error(const enum hw_event_mc_err_type type, struct mem_ctl_info *mci, const u16 error_count, diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index d1e9eb191f2b..cdb81aa73ab7 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -42,6 +42,12 @@ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); +/* + * Used to lock EDAC MC to just one module, avoiding two drivers e. g. + * apei/ghes and i7core_edac to be used at the same time. + */ +static void const *edac_mc_owner; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -441,13 +447,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, mci->op_state = OP_ALLOC; - /* at this point, the root kobj is valid, and in order to - * 'free' the object, then the function: - * edac_mc_unregister_sysfs_main_kobj() must be called - * which will perform kobj unregistration and the actual free - * will occur during the kobject callback operation - */ - return mci; error: @@ -666,9 +665,9 @@ fail1: return 1; } -static void del_mc_from_global_list(struct mem_ctl_info *mci) +static int del_mc_from_global_list(struct mem_ctl_info *mci) { - atomic_dec(&edac_handlers); + int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -676,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) */ synchronize_rcu(); INIT_LIST_HEAD(&mci->link); + + return handlers; } /** @@ -719,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find); /* FIXME - should a warning be printed if no error detection? correction? */ int edac_mc_add_mc(struct mem_ctl_info *mci) { + int ret = -EINVAL; edac_dbg(0, "\n"); #ifdef CONFIG_EDAC_DEBUG @@ -749,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) #endif mutex_lock(&mem_ctls_mutex); + if (edac_mc_owner && edac_mc_owner != mci->mod_name) { + ret = -EPERM; + goto fail0; + } + if (add_mc_to_global_list(mci)) goto fail0; @@ -775,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); + edac_mc_owner = mci->mod_name; + mutex_unlock(&mem_ctls_mutex); return 0; @@ -783,7 +792,7 @@ fail1: fail0: mutex_unlock(&mem_ctls_mutex); - return 1; + return ret; } EXPORT_SYMBOL_GPL(edac_mc_add_mc); @@ -809,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) return NULL; } - del_mc_from_global_list(mci); + if (!del_mc_from_global_list(mci)) + edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); /* flush workq processes */ @@ -907,6 +917,7 @@ const char *edac_layer_name[] = { [EDAC_MC_LAYER_CHANNEL] = "channel", [EDAC_MC_LAYER_SLOT] = "slot", [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", + [EDAC_MC_LAYER_ALL_MEM] = "memory", }; EXPORT_SYMBOL_GPL(edac_layer_name); @@ -1054,7 +1065,46 @@ static void edac_ue_error(struct mem_ctl_info *mci, edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } -#define OTHER_LABEL " or " +/** + * edac_raw_mc_handle_error - reports a memory event to userspace without doing + * anything to discover the error location + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @e: error description + * + * This raw function is used internally by edac_mc_handle_error(). It should + * only be called directly when the hardware error come directly from BIOS, + * like in the case of APEI GHES driver. + */ +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e) +{ + char detail[80]; + int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + + /* Memory type dependent details about the error */ + if (type == HW_EVENT_ERR_CORRECTED) { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", + e->page_frame_number, e->offset_in_page, + e->grain, e->syndrome); + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report, + e->page_frame_number, e->offset_in_page, e->grain); + } else { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld", + e->page_frame_number, e->offset_in_page, e->grain); + + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report); + } + + +} +EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); /** * edac_mc_handle_error - reports a memory event to userspace @@ -1086,19 +1136,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { - /* FIXME: too much for stack: move it to some pre-alocated area */ - char detail[80], location[80]; - char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; - int i; - long grain; - bool enable_per_layer_report = false; + int i, n_labels = 0; u8 grain_bits; + struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); + /* Fills the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = error_count; + e->top_layer = top_layer; + e->mid_layer = mid_layer; + e->low_layer = low_layer; + e->page_frame_number = page_frame_number; + e->offset_in_page = offset_in_page; + e->syndrome = syndrome; + e->msg = msg; + e->other_detail = other_detail; + /* * Check if the event report is consistent and if the memory * location is known. If it is known, enable_per_layer_report will be @@ -1121,7 +1179,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) - enable_per_layer_report = true; + e->enable_per_layer_report = true; } /* @@ -1135,8 +1193,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * where each memory belongs to a separate channel within the same * branch. */ - grain = 0; - p = label; + p = e->label; *p = '\0'; for (i = 0; i < mci->tot_dimms; i++) { @@ -1150,8 +1207,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, continue; /* get the max grain, over the error match range */ - if (dimm->grain > grain) - grain = dimm->grain; + if (dimm->grain > e->grain) + e->grain = dimm->grain; /* * If the error is memory-controller wide, there's no need to @@ -1159,8 +1216,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ - if (enable_per_layer_report && dimm->nr_pages) { - if (p != label) { + if (e->enable_per_layer_report && dimm->nr_pages) { + if (n_labels >= EDAC_MAX_LABELS) { + e->enable_per_layer_report = false; + break; + } + n_labels++; + if (p != e->label) { strcpy(p, OTHER_LABEL); p += strlen(OTHER_LABEL); } @@ -1187,12 +1249,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } } - if (!enable_per_layer_report) { - strcpy(label, "any memory"); + if (!e->enable_per_layer_report) { + strcpy(e->label, "any memory"); } else { edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); - if (p == label) - strcpy(label, "unknown memory"); + if (p == e->label) + strcpy(e->label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { mci->csrows[row]->ce_count += error_count; @@ -1205,7 +1267,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } /* Fill the RAM location data */ - p = location; + p = e->location; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) @@ -1215,32 +1277,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } - if (p > location) + if (p > e->location) *(p - 1) = '\0'; /* Report the error via the trace interface */ - grain_bits = fls_long(grain) + 1; - trace_mc_event(type, msg, label, error_count, - mci->mc_idx, top_layer, mid_layer, low_layer, - PAGES_TO_MiB(page_frame_number) | offset_in_page, - grain_bits, syndrome, other_detail); + grain_bits = fls_long(e->grain) + 1; + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); - /* Memory type dependent details about the error */ - if (type == HW_EVENT_ERR_CORRECTED) { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", - page_frame_number, offset_in_page, - grain, syndrome); - edac_ce_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report, - page_frame_number, offset_in_page, grain); - } else { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld", - page_frame_number, offset_in_page, grain); - - edac_ue_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report); - } + edac_raw_mc_handle_error(type, mci, e); } EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 0ca1ca71157f..4f4b6137d74e 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -7,7 +7,7 @@ * * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com * - * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com> + * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com> * The entire API were re-written, and ported to use struct device * */ @@ -429,8 +429,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) if (!nr_pages_per_csrow(csrow)) continue; err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) + if (err < 0) { + edac_dbg(1, + "failure: create csrow objects for csrow %d\n", + i); goto error; + } } return 0; @@ -677,9 +681,6 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev, unsigned long bandwidth = 0; int new_bw = 0; - if (!mci->set_sdram_scrub_rate) - return -ENODEV; - if (strict_strtoul(data, 10, &bandwidth) < 0) return -EINVAL; @@ -703,9 +704,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); int bandwidth = 0; - if (!mci->get_sdram_scrub_rate) - return -ENODEV; - bandwidth = mci->get_sdram_scrub_rate(mci); if (bandwidth < 0) { edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); @@ -866,8 +864,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); /* memory scrubber attribute file */ -DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, - mci_sdram_scrub_rate_store); +DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL); static struct attribute *mci_attrs[] = { &dev_attr_reset_counters.attr, @@ -878,7 +875,6 @@ static struct attribute *mci_attrs[] = { &dev_attr_ce_noinfo_count.attr, &dev_attr_ue_count.attr, &dev_attr_ce_count.attr, - &dev_attr_sdram_scrub_rate.attr, &dev_attr_max_location.attr, NULL }; @@ -1007,11 +1003,28 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); bus_unregister(&mci->bus); kfree(mci->bus.name); return err; } + if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) { + if (mci->get_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO; + dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show; + } + if (mci->set_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR; + dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store; + } + err = device_create_file(&mci->dev, + &dev_attr_sdram_scrub_rate); + if (err) { + edac_dbg(1, "failure: create sdram_scrub_rate\n"); + goto fail2; + } + } /* * Create the dimm/rank devices */ @@ -1056,6 +1069,7 @@ fail: continue; device_unregister(&dimm->dev); } +fail2: device_unregister(&mci->dev); bus_unregister(&mci->bus); kfree(mci->bus.name); diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 12c951a2c33d..a66941fea5a4 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -146,7 +146,7 @@ static void __exit edac_exit(void) /* * Inform the kernel of our entry and exit points */ -module_init(edac_init); +subsys_initcall(edac_init); module_exit(edac_exit); MODULE_LICENSE("GPL"); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 0056c4dae9d5..e8658e451762 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void) if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { edac_dbg(0, "called kobject_put on main kobj\n"); kobject_put(edac_pci_top_main_kobj); + edac_put_sysfs_subsys(); } - edac_put_sysfs_subsys(); } /* diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c new file mode 100644 index 000000000000..bb534670ec02 --- /dev/null +++ b/drivers/edac/ghes_edac.c @@ -0,0 +1,537 @@ +/* + * GHES/EDAC Linux driver + * + * This file may be distributed under the terms of the GNU General Public + * License version 2. + * + * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com> + * + * Red Hat Inc. http://www.redhat.com + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <acpi/ghes.h> +#include <linux/edac.h> +#include <linux/dmi.h> +#include "edac_core.h" +#include <ras/ras_event.h> + +#define GHES_EDAC_REVISION " Ver: 1.0.0" + +struct ghes_edac_pvt { + struct list_head list; + struct ghes *ghes; + struct mem_ctl_info *mci; + + /* Buffers for the error handling routine */ + char detail_location[240]; + char other_detail[160]; + char msg[80]; +}; + +static LIST_HEAD(ghes_reglist); +static DEFINE_MUTEX(ghes_edac_lock); +static int ghes_edac_mc_num; + + +/* Memory Device - Type 17 of SMBIOS spec */ +struct memdev_dmi_entry { + u8 type; + u8 length; + u16 handle; + u16 phys_mem_array_handle; + u16 mem_err_info_handle; + u16 total_width; + u16 data_width; + u16 size; + u8 form_factor; + u8 device_set; + u8 device_locator; + u8 bank_locator; + u8 memory_type; + u16 type_detail; + u16 speed; + u8 manufacturer; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 attributes; + u32 extended_size; + u16 conf_mem_clk_speed; +} __attribute__((__packed__)); + +struct ghes_edac_dimm_fill { + struct mem_ctl_info *mci; + unsigned count; +}; + +char *memory_type[] = { + [MEM_EMPTY] = "EMPTY", + [MEM_RESERVED] = "RESERVED", + [MEM_UNKNOWN] = "UNKNOWN", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "SDR", + [MEM_RDR] = "RDR", + [MEM_DDR] = "DDR", + [MEM_RDDR] = "RDDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "DDR2", + [MEM_FB_DDR2] = "FB_DDR2", + [MEM_RDDR2] = "RDDR2", + [MEM_XDR] = "XDR", + [MEM_DDR3] = "DDR3", + [MEM_RDDR3] = "RDDR3", +}; + +static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) +{ + int *num_dimm = arg; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) + (*num_dimm)++; +} + +static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) +{ + struct ghes_edac_dimm_fill *dimm_fill = arg; + struct mem_ctl_info *mci = dimm_fill->mci; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) { + struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, + dimm_fill->count, 0, 0); + + if (entry->size == 0xffff) { + pr_info("Can't get DIMM%i size\n", + dimm_fill->count); + dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ + } else if (entry->size == 0x7fff) { + dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); + } else { + if (entry->size & 1 << 15) + dimm->nr_pages = MiB_TO_PAGES((entry->size & + 0x7fff) << 10); + else + dimm->nr_pages = MiB_TO_PAGES(entry->size); + } + + switch (entry->memory_type) { + case 0x12: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR; + else + dimm->mtype = MEM_DDR; + break; + case 0x13: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR2; + else + dimm->mtype = MEM_DDR2; + break; + case 0x14: + dimm->mtype = MEM_FB_DDR2; + break; + case 0x18: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR3; + else + dimm->mtype = MEM_DDR3; + break; + default: + if (entry->type_detail & 1 << 6) + dimm->mtype = MEM_RMBS; + else if ((entry->type_detail & ((1 << 7) | (1 << 13))) + == ((1 << 7) | (1 << 13))) + dimm->mtype = MEM_RDR; + else if (entry->type_detail & 1 << 7) + dimm->mtype = MEM_SDR; + else if (entry->type_detail & 1 << 9) + dimm->mtype = MEM_EDO; + else + dimm->mtype = MEM_UNKNOWN; + } + + /* + * Actually, we can only detect if the memory has bits for + * checksum or not + */ + if (entry->total_width == entry->data_width) + dimm->edac_mode = EDAC_NONE; + else + dimm->edac_mode = EDAC_SECDED; + + dimm->dtype = DEV_UNKNOWN; + dimm->grain = 128; /* Likely, worse case */ + + /* + * FIXME: It shouldn't be hard to also fill the DIMM labels + */ + + if (dimm->nr_pages) { + edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", + dimm_fill->count, memory_type[dimm->mtype], + PAGES_TO_MiB(dimm->nr_pages), + (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); + edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", + entry->memory_type, entry->type_detail, + entry->total_width, entry->data_width); + } + + dimm_fill->count++; + } +} + +void ghes_edac_report_mem_error(struct ghes *ghes, int sev, + struct cper_sec_mem_err *mem_err) +{ + enum hw_event_mc_err_type type; + struct edac_raw_error_desc *e; + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt = NULL; + char *p; + u8 grain_bits; + + list_for_each_entry(pvt, &ghes_reglist, list) { + if (ghes == pvt->ghes) + break; + } + if (!pvt) { + pr_err("Internal error: Can't find EDAC structure\n"); + return; + } + mci = pvt->mci; + e = &mci->error_desc; + + /* Cleans the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = 1; + strcpy(e->label, "unknown label"); + e->msg = pvt->msg; + e->other_detail = pvt->other_detail; + e->top_layer = -1; + e->mid_layer = -1; + e->low_layer = -1; + *pvt->other_detail = '\0'; + *pvt->msg = '\0'; + + switch (sev) { + case GHES_SEV_CORRECTED: + type = HW_EVENT_ERR_CORRECTED; + break; + case GHES_SEV_RECOVERABLE: + type = HW_EVENT_ERR_UNCORRECTED; + break; + case GHES_SEV_PANIC: + type = HW_EVENT_ERR_FATAL; + break; + default: + case GHES_SEV_NO: + type = HW_EVENT_ERR_INFO; + } + + edac_dbg(1, "error validation_bits: 0x%08llx\n", + (long long)mem_err->validation_bits); + + /* Error type, mapped on e->msg */ + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + p = pvt->msg; + switch (mem_err->error_type) { + case 0: + p += sprintf(p, "Unknown"); + break; + case 1: + p += sprintf(p, "No error"); + break; + case 2: + p += sprintf(p, "Single-bit ECC"); + break; + case 3: + p += sprintf(p, "Multi-bit ECC"); + break; + case 4: + p += sprintf(p, "Single-symbol ChipKill ECC"); + break; + case 5: + p += sprintf(p, "Multi-symbol ChipKill ECC"); + break; + case 6: + p += sprintf(p, "Master abort"); + break; + case 7: + p += sprintf(p, "Target abort"); + break; + case 8: + p += sprintf(p, "Parity Error"); + break; + case 9: + p += sprintf(p, "Watchdog timeout"); + break; + case 10: + p += sprintf(p, "Invalid address"); + break; + case 11: + p += sprintf(p, "Mirror Broken"); + break; + case 12: + p += sprintf(p, "Memory Sparing"); + break; + case 13: + p += sprintf(p, "Scrub corrected error"); + break; + case 14: + p += sprintf(p, "Scrub uncorrected error"); + break; + case 15: + p += sprintf(p, "Physical Memory Map-out event"); + break; + default: + p += sprintf(p, "reserved error (%d)", + mem_err->error_type); + } + } else { + strcpy(pvt->msg, "unknown error"); + } + + /* Error address */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; + e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; + } + + /* Error grain */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) { + e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + } + + /* Memory error location, mapped on e->location */ + p = e->location; + if (mem_err->validation_bits & CPER_MEM_VALID_NODE) + p += sprintf(p, "node:%d ", mem_err->node); + if (mem_err->validation_bits & CPER_MEM_VALID_CARD) + p += sprintf(p, "card:%d ", mem_err->card); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) + p += sprintf(p, "module:%d ", mem_err->module); + if (mem_err->validation_bits & CPER_MEM_VALID_BANK) + p += sprintf(p, "bank:%d ", mem_err->bank); + if (mem_err->validation_bits & CPER_MEM_VALID_ROW) + p += sprintf(p, "row:%d ", mem_err->row); + if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) + p += sprintf(p, "col:%d ", mem_err->column); + if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) + p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); + if (p > e->location) + *(p - 1) = '\0'; + + /* All other fields are mapped on e->other_detail */ + p = pvt->other_detail; + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { + u64 status = mem_err->error_status; + + p += sprintf(p, "status(0x%016llx): ", (long long)status); + switch ((status >> 8) & 0xff) { + case 1: + p += sprintf(p, "Error detected internal to the component "); + break; + case 16: + p += sprintf(p, "Error detected in the bus "); + break; + case 4: + p += sprintf(p, "Storage error in DRAM memory "); + break; + case 5: + p += sprintf(p, "Storage error in TLB "); + break; + case 6: + p += sprintf(p, "Storage error in cache "); + break; + case 7: + p += sprintf(p, "Error in one or more functional units "); + break; + case 8: + p += sprintf(p, "component failed self test "); + break; + case 9: + p += sprintf(p, "Overflow or undervalue of internal queue "); + break; + case 17: + p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); + break; + case 18: + p += sprintf(p, "Improper access error "); + break; + case 19: + p += sprintf(p, "Access to a memory address which is not mapped to any component "); + break; + case 20: + p += sprintf(p, "Loss of Lockstep "); + break; + case 21: + p += sprintf(p, "Response not associated with a request "); + break; + case 22: + p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); + break; + case 23: + p += sprintf(p, "Detection of a PATH_ERROR "); + break; + case 25: + p += sprintf(p, "Bus operation timeout "); + break; + case 26: + p += sprintf(p, "A read was issued to data that has been poisoned "); + break; + default: + p += sprintf(p, "reserved "); + break; + } + } + if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + p += sprintf(p, "requestorID: 0x%016llx ", + (long long)mem_err->requestor_id); + if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + p += sprintf(p, "responderID: 0x%016llx ", + (long long)mem_err->responder_id); + if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) + p += sprintf(p, "targetID: 0x%016llx ", + (long long)mem_err->responder_id); + if (p > pvt->other_detail) + *(p - 1) = '\0'; + + /* Generate the trace event */ + grain_bits = fls_long(e->grain); + sprintf(pvt->detail_location, "APEI location: %s %s", + e->location, e->other_detail); + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, pvt->detail_location); + + /* Report the error via EDAC API */ + edac_raw_mc_handle_error(type, mci, e); +} +EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); + +int ghes_edac_register(struct ghes *ghes, struct device *dev) +{ + bool fake = false; + int rc, num_dimm = 0; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + struct ghes_edac_pvt *pvt; + struct ghes_edac_dimm_fill dimm_fill; + + /* Get the number of DIMMs */ + dmi_walk(ghes_edac_count_dimms, &num_dimm); + + /* Check if we've got a bogus BIOS */ + if (num_dimm == 0) { + fake = true; + num_dimm = 1; + } + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = num_dimm; + layers[0].is_virt_csrow = true; + + /* + * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), + * to avoid duplicated memory controller numbers + */ + mutex_lock(&ghes_edac_lock); + mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (!mci) { + pr_info("Can't allocate memory for EDAC data\n"); + mutex_unlock(&ghes_edac_lock); + return -ENOMEM; + } + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + list_add_tail(&pvt->list, &ghes_reglist); + pvt->ghes = ghes; + pvt->mci = mci; + mci->pdev = dev; + + mci->mtype_cap = MEM_FLAG_EMPTY; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "ghes_edac.c"; + mci->mod_ver = GHES_EDAC_REVISION; + mci->ctl_name = "ghes_edac"; + mci->dev_name = "ghes"; + + if (!ghes_edac_mc_num) { + if (!fake) { + pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); + pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); + pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); + pr_info("If you find incorrect reports, please contact your hardware vendor\n"); + pr_info("to correct its BIOS.\n"); + pr_info("This system has %d DIMM sockets.\n", + num_dimm); + } else { + pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); + pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); + pr_info("work on such system. Use this driver with caution\n"); + } + } + + if (!fake) { + /* + * Fill DIMM info from DMI for the memory controller #0 + * + * Keep it in blank for the other memory controllers, as + * there's no reliable way to properly credit each DIMM to + * the memory controller, as different BIOSes fill the + * DMI bank location fields on different ways + */ + if (!ghes_edac_mc_num) { + dimm_fill.count = 0; + dimm_fill.mci = mci; + dmi_walk(ghes_edac_dmidecode, &dimm_fill); + } + } else { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, 0, 0, 0); + + dimm->nr_pages = 1; + dimm->grain = 128; + dimm->mtype = MEM_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_SECDED; + } + + rc = edac_mc_add_mc(mci); + if (rc < 0) { + pr_info("Can't register at EDAC core\n"); + edac_mc_free(mci); + mutex_unlock(&ghes_edac_lock); + return -ENODEV; + } + + ghes_edac_mc_num++; + mutex_unlock(&ghes_edac_lock); + return 0; +} +EXPORT_SYMBOL_GPL(ghes_edac_register); + +void ghes_edac_unregister(struct ghes *ghes) +{ + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt, *tmp; + + list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) { + if (ghes == pvt->ghes) { + mci = pvt->mci; + edac_mc_del_mc(mci->pdev); + edac_mc_free(mci); + list_del(&pvt->list); + } + } +} +EXPORT_SYMBOL_GPL(ghes_edac_unregister); diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 4e8337602e78..aa44c1718f50 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -106,16 +106,26 @@ static int nr_channels; static int how_many_channels(struct pci_dev *pdev) { + int n_channels; + unsigned char capid0_8b; /* 8th byte of CAPID0 */ pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); + if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ edac_dbg(0, "In single channel mode\n"); - return 1; + n_channels = 1; } else { edac_dbg(0, "In dual channel mode\n"); - return 2; + n_channels = 2; } + + if (capid0_8b & 0x10) /* check if both channels are filled */ + edac_dbg(0, "2 DIMMS per channel disabled\n"); + else + edac_dbg(0, "2 DIMMS per channel enabled\n"); + + return n_channels; } static unsigned long eccerrlog_syndrome(u64 log) @@ -290,6 +300,8 @@ static void i3200_get_drbs(void __iomem *window, for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; + + edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]); } } @@ -311,6 +323,9 @@ static unsigned long drb_to_nr_pages( int n; n = drbs[channel][rank]; + if (!n) + return 0; + if (rank > 0) n -= drbs[channel][rank - 1]; if (stacked && (channel == 1) && @@ -377,19 +392,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) * cumulative; the last one will contain the total memory * contained in all ranks. */ - for (i = 0; i < mci->nr_csrows; i++) { + for (i = 0; i < I3200_DIMMS; i++) { unsigned long nr_pages; - struct csrow_info *csrow = mci->csrows[i]; - nr_pages = drb_to_nr_pages(drbs, stacked, - i / I3200_RANKS_PER_CHANNEL, - i % I3200_RANKS_PER_CHANNEL); + for (j = 0; j < nr_channels; j++) { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); - if (nr_pages == 0) - continue; + nr_pages = drb_to_nr_pages(drbs, stacked, j, i); + if (nr_pages == 0) + continue; - for (j = 0; j < nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j]->dimm; + edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; dimm->grain = nr_pages << PAGE_SHIFT; diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index d6955b2cc99f..1b635178cc44 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -27,6 +27,7 @@ #include <linux/edac.h> #include <linux/delay.h> #include <linux/mmzone.h> +#include <linux/debugfs.h> #include "edac_core.h" @@ -68,6 +69,14 @@ I5100_FERR_NF_MEM_M1ERR_MASK) #define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ #define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ +#define I5100_MEM0EINJMSK0 0x200 /* Injection Mask0 Register Channel 0 */ +#define I5100_MEM1EINJMSK0 0x208 /* Injection Mask0 Register Channel 1 */ +#define I5100_MEMXEINJMSK0_EINJEN (1 << 27) +#define I5100_MEM0EINJMSK1 0x204 /* Injection Mask1 Register Channel 0 */ +#define I5100_MEM1EINJMSK1 0x206 /* Injection Mask1 Register Channel 1 */ + +/* Device 19, Function 0 */ +#define I5100_DINJ0 0x9a /* device 21 and 22, func 0 */ #define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ @@ -338,13 +347,26 @@ struct i5100_priv { unsigned ranksperchan; /* number of ranks per channel */ struct pci_dev *mc; /* device 16 func 1 */ + struct pci_dev *einj; /* device 19 func 0 */ struct pci_dev *ch0mm; /* device 21 func 0 */ struct pci_dev *ch1mm; /* device 22 func 0 */ struct delayed_work i5100_scrubbing; int scrub_enable; + + /* Error injection */ + u8 inject_channel; + u8 inject_hlinesel; + u8 inject_deviceptr1; + u8 inject_deviceptr2; + u16 inject_eccmask1; + u16 inject_eccmask2; + + struct dentry *debugfs; }; +static struct dentry *i5100_debugfs; + /* map a rank/chan to a slot number on the mainboard */ static int i5100_rank_to_slot(const struct mem_ctl_info *mci, int chan, int rank) @@ -863,13 +885,126 @@ static void i5100_init_csrows(struct mem_ctl_info *mci) } } +/**************************************************************************** + * Error injection routines + ****************************************************************************/ + +static void i5100_do_inject(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + u32 mask0; + u16 mask1; + + /* MEM[1:0]EINJMSK0 + * 31 - ADDRMATCHEN + * 29:28 - HLINESEL + * 00 Reserved + * 01 Lower half of cache line + * 10 Upper half of cache line + * 11 Both upper and lower parts of cache line + * 27 - EINJEN + * 25:19 - XORMASK1 for deviceptr1 + * 9:5 - SEC2RAM for deviceptr2 + * 4:0 - FIR2RAM for deviceptr1 + */ + mask0 = ((priv->inject_hlinesel & 0x3) << 28) | + I5100_MEMXEINJMSK0_EINJEN | + ((priv->inject_eccmask1 & 0xffff) << 10) | + ((priv->inject_deviceptr2 & 0x1f) << 5) | + (priv->inject_deviceptr1 & 0x1f); + + /* MEM[1:0]EINJMSK1 + * 15:0 - XORMASK2 for deviceptr2 + */ + mask1 = priv->inject_eccmask2; + + if (priv->inject_channel == 0) { + pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1); + } else { + pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1); + } + + /* Error Injection Response Function + * Intel 5100 Memory Controller Hub Chipset (318378) datasheet + * hints about this register but carry no data about them. All + * data regarding device 19 is based on experimentation and the + * Intel 7300 Chipset Memory Controller Hub (318082) datasheet + * which appears to be accurate for the i5100 in this area. + * + * The injection code don't work without setting this register. + * The register needs to be flipped off then on else the hardware + * will only preform the first injection. + * + * Stop condition bits 7:4 + * 1010 - Stop after one injection + * 1011 - Never stop injecting faults + * + * Start condition bits 3:0 + * 1010 - Never start + * 1011 - Start immediately + */ + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa); + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) +static ssize_t inject_enable_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + + i5100_do_inject(mci); + + return count; +} + +static const struct file_operations i5100_inject_enable_fops = { + .open = simple_open, + .write = inject_enable_write, + .llseek = generic_file_llseek, +}; + +static int i5100_setup_debugfs(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + + if (!i5100_debugfs) + return -ENODEV; + + priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + + if (!priv->debugfs) + return -ENOMEM; + + debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_channel); + debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_hlinesel); + debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr1); + debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr2); + debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask1); + debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask2); + debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, + &mci->dev, &i5100_inject_enable_fops); + + return 0; + +} + static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; struct i5100_priv *priv; - struct pci_dev *ch0mm, *ch1mm; + struct pci_dev *ch0mm, *ch1mm, *einj; int ret = 0; u32 dw; int ranksperch; @@ -941,6 +1076,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_disable_ch1; } + + /* device 19, func 0, Error injection */ + einj = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_19, 0); + if (!einj) { + ret = -ENODEV; + goto bail_einj; + } + + rc = pci_enable_device(einj); + if (rc < 0) { + ret = rc; + goto bail_disable_einj; + } + + mci->pdev = &pdev->dev; priv = mci->pvt_info; @@ -948,6 +1099,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) priv->mc = pdev; priv->ch0mm = ch0mm; priv->ch1mm = ch1mm; + priv->einj = einj; INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); @@ -975,6 +1127,13 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mci->set_sdram_scrub_rate = i5100_set_scrub_rate; mci->get_sdram_scrub_rate = i5100_get_scrub_rate; + priv->inject_channel = 0; + priv->inject_hlinesel = 0; + priv->inject_deviceptr1 = 0; + priv->inject_deviceptr2 = 0; + priv->inject_eccmask1 = 0; + priv->inject_eccmask2 = 0; + i5100_init_csrows(mci); /* this strange construction seems to be in every driver, dunno why */ @@ -992,6 +1151,8 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_scrub; } + i5100_setup_debugfs(mci); + return ret; bail_scrub: @@ -999,6 +1160,12 @@ bail_scrub: cancel_delayed_work_sync(&(priv->i5100_scrubbing)); edac_mc_free(mci); +bail_disable_einj: + pci_disable_device(einj); + +bail_einj: + pci_dev_put(einj); + bail_disable_ch1: pci_disable_device(ch1mm); @@ -1030,14 +1197,18 @@ static void i5100_remove_one(struct pci_dev *pdev) priv = mci->pvt_info; + debugfs_remove_recursive(priv->debugfs); + priv->scrub_enable = 0; cancel_delayed_work_sync(&(priv->i5100_scrubbing)); pci_disable_device(pdev); pci_disable_device(priv->ch0mm); pci_disable_device(priv->ch1mm); + pci_disable_device(priv->einj); pci_dev_put(priv->ch0mm); pci_dev_put(priv->ch1mm); + pci_dev_put(priv->einj); edac_mc_free(mci); } @@ -1060,13 +1231,16 @@ static int __init i5100_init(void) { int pci_rc; - pci_rc = pci_register_driver(&i5100_driver); + i5100_debugfs = debugfs_create_dir("i5100_edac", NULL); + pci_rc = pci_register_driver(&i5100_driver); return (pci_rc < 0) ? pci_rc : 0; } static void __exit i5100_exit(void) { + debugfs_remove(i5100_debugfs); + pci_unregister_driver(&i5100_driver); } diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index e213d030b0dd..0ec3e95a12cd 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms) static inline int numrank(u32 rank) { - static int ranks[4] = { 1, 2, 4, -EINVAL }; + static const int ranks[] = { 1, 2, 4, -EINVAL }; return ranks[rank & 0x3]; } static inline int numbank(u32 bank) { - static int banks[4] = { 4, 8, 16, -EINVAL }; + static const int banks[] = { 4, 8, 16, -EINVAL }; return banks[bank & 0x3]; } static inline int numrow(u32 row) { - static int rows[8] = { + static const int rows[] = { 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, -EINVAL, -EINVAL, -EINVAL, }; @@ -444,7 +444,7 @@ static inline int numrow(u32 row) static inline int numcol(u32 col) { - static int cols[8] = { + static const int cols[] = { 1 << 10, 1 << 11, 1 << 12, -EINVAL, }; return cols[col & 0x3]; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index da7e2986e3d5..57244f995614 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -639,7 +639,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = (1 + pvt->tohm) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e67a4be0080d..bb2cd3ce9b0f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -626,7 +626,6 @@ static void dec_pending(struct dm_io *io, int error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ - trace_block_bio_complete(md->queue, bio, io_error); bio_endio(bio, io_error); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 697f026cb318..5af2d2709081 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -184,8 +184,6 @@ static void return_io(struct bio *return_bi) return_bi = bi->bi_next; bi->bi_next = NULL; bi->bi_size = 0; - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); bi = return_bi; } @@ -3916,8 +3914,6 @@ static void raid5_align_endio(struct bio *bi, int error) rdev_dec_pending(rdev, conf->mddev); if (!error && uptodate) { - trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), - raid_bi, 0); bio_endio(raid_bi, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); @@ -4376,8 +4372,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ( rw == WRITE ) md_write_end(mddev); - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); } } @@ -4754,11 +4748,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) handled++; } remaining = raid5_dec_bi_active_stripes(raid_bio); - if (remaining == 0) { - trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), - raid_bio, 0); + if (remaining == 0) bio_endio(raid_bio, 0); - } if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); return handled; diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 383a727b8aa0..6e5ad8ec0a22 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -1338,28 +1338,15 @@ static int isp_enable_clocks(struct isp_device *isp) { int r; unsigned long rate; - int divisor; - - /* - * cam_mclk clock chain: - * dpll4 -> dpll4_m5 -> dpll4_m5x2 -> cam_mclk - * - * In OMAP3630 dpll4_m5x2 != 2 x dpll4_m5 but both are - * set to the same value. Hence the rate set for dpll4_m5 - * has to be twice of what is set on OMAP3430 to get - * the required value for cam_mclk - */ - divisor = isp->revision == ISP_REVISION_15_0 ? 1 : 2; r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_ICK]); if (r) { dev_err(isp->dev, "failed to enable cam_ick clock\n"); goto out_clk_enable_ick; } - r = clk_set_rate(isp->clock[ISP_CLK_DPLL4_M5_CK], - CM_CAM_MCLK_HZ/divisor); + r = clk_set_rate(isp->clock[ISP_CLK_CAM_MCLK], CM_CAM_MCLK_HZ); if (r) { - dev_err(isp->dev, "clk_set_rate for dpll4_m5_ck failed\n"); + dev_err(isp->dev, "clk_set_rate for cam_mclk failed\n"); goto out_clk_enable_mclk; } r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_MCLK]); @@ -1401,7 +1388,6 @@ static void isp_disable_clocks(struct isp_device *isp) static const char *isp_clocks[] = { "cam_ick", "cam_mclk", - "dpll4_m5_ck", "csi2_96m_fck", "l3_ick", }; diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h index 517d348ce32b..c77e1f2ae5ca 100644 --- a/drivers/media/platform/omap3isp/isp.h +++ b/drivers/media/platform/omap3isp/isp.h @@ -147,7 +147,6 @@ struct isp_platform_callback { * @ref_count: Reference count for handling multiple ISP requests. * @cam_ick: Pointer to camera interface clock structure. * @cam_mclk: Pointer to camera functional clock structure. - * @dpll4_m5_ck: Pointer to DPLL4 M5 clock structure. * @csi2_fck: Pointer to camera CSI2 complexIO clock structure. * @l3_ick: Pointer to OMAP3 L3 bus interface clock. * @irq: Currently attached ISP ISR callbacks information structure. @@ -189,10 +188,9 @@ struct isp_device { u32 xclk_divisor[2]; /* Two clocks, a and b. */ #define ISP_CLK_CAM_ICK 0 #define ISP_CLK_CAM_MCLK 1 -#define ISP_CLK_DPLL4_M5_CK 2 -#define ISP_CLK_CSI2_FCK 3 -#define ISP_CLK_L3_ICK 4 - struct clk *clock[5]; +#define ISP_CLK_CSI2_FCK 2 +#define ISP_CLK_L3_ICK 3 + struct clk *clock[4]; /* ISP modules */ struct ispstat isp_af; diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 742f5d7eb0f5..a6f7190c09a4 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -12,13 +12,13 @@ *----------------------------------------------------------------------------*/ #ifndef AAC_DRIVER_BUILD -# define AAC_DRIVER_BUILD 29801 +# define AAC_DRIVER_BUILD 30000 # define AAC_DRIVER_BRANCH "-ms" #endif #define MAXIMUM_NUM_CONTAINERS 32 #define AAC_NUM_MGT_FIB 8 -#define AAC_NUM_IO_FIB (512 - AAC_NUM_MGT_FIB) +#define AAC_NUM_IO_FIB (1024 - AAC_NUM_MGT_FIB) #define AAC_NUM_FIB (AAC_NUM_IO_FIB + AAC_NUM_MGT_FIB) #define AAC_MAX_LUN (8) @@ -36,6 +36,10 @@ #define CONTAINER_TO_ID(cont) (cont) #define CONTAINER_TO_LUN(cont) (0) +#define PMC_DEVICE_S7 0x28c +#define PMC_DEVICE_S8 0x28d +#define PMC_DEVICE_S9 0x28f + #define aac_phys_to_logical(x) ((x)+1) #define aac_logical_to_phys(x) ((x)?(x)-1:0) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 8e5d3be16127..3f759957f4b4 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -404,7 +404,13 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) dev->max_fib_size = status[1] & 0xFFE0; host->sg_tablesize = status[2] >> 16; dev->sg_tablesize = status[2] & 0xFFFF; - host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; + if (dev->pdev->device == PMC_DEVICE_S7 || + dev->pdev->device == PMC_DEVICE_S8 || + dev->pdev->device == PMC_DEVICE_S9) + host->can_queue = ((status[3] >> 16) ? (status[3] >> 16) : + (status[3] & 0xFFFF)) - AAC_NUM_MGT_FIB; + else + host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; dev->max_num_aif = status[4] & 0xFFFF; /* * NOTE: @@ -452,6 +458,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) } } + if (host->can_queue > AAC_NUM_IO_FIB) + host->can_queue = AAC_NUM_IO_FIB; + /* * Ok now init the communication subsystem */ diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index e6bf12675db8..a5f7690e819e 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -1034,7 +1034,7 @@ bfad_start_ops(struct bfad_s *bfad) { sizeof(driver_info.host_os_patch) - 1); strncpy(driver_info.os_device_name, bfad->pci_name, - sizeof(driver_info.os_device_name - 1)); + sizeof(driver_info.os_device_name) - 1); /* FCS driver info init */ spin_lock_irqsave(&bfad->bfad_lock, flags); diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index 3486845ba301..50fcd018d14b 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -64,7 +64,7 @@ #include "bnx2fc_constants.h" #define BNX2FC_NAME "bnx2fc" -#define BNX2FC_VERSION "1.0.12" +#define BNX2FC_VERSION "1.0.13" #define PFX "bnx2fc: " @@ -156,6 +156,18 @@ #define BNX2FC_RELOGIN_WAIT_TIME 200 #define BNX2FC_RELOGIN_WAIT_CNT 10 +#define BNX2FC_STATS(hba, stat, cnt) \ + do { \ + u32 val; \ + \ + val = fw_stats->stat.cnt; \ + if (hba->prev_stats.stat.cnt <= val) \ + val -= hba->prev_stats.stat.cnt; \ + else \ + val += (0xfffffff - hba->prev_stats.stat.cnt); \ + hba->bfw_stats.cnt += val; \ + } while (0) + /* bnx2fc driver uses only one instance of fcoe_percpu_s */ extern struct fcoe_percpu_s bnx2fc_global; @@ -167,6 +179,14 @@ struct bnx2fc_percpu_s { spinlock_t fp_work_lock; }; +struct bnx2fc_fw_stats { + u64 fc_crc_cnt; + u64 fcoe_tx_pkt_cnt; + u64 fcoe_rx_pkt_cnt; + u64 fcoe_tx_byte_cnt; + u64 fcoe_rx_byte_cnt; +}; + struct bnx2fc_hba { struct list_head list; struct cnic_dev *cnic; @@ -207,6 +227,8 @@ struct bnx2fc_hba { struct bnx2fc_rport **tgt_ofld_list; /* statistics */ + struct bnx2fc_fw_stats bfw_stats; + struct fcoe_statistics_params prev_stats; struct fcoe_statistics_params *stats_buffer; dma_addr_t stats_buf_dma; struct completion stat_req_done; @@ -280,6 +302,7 @@ struct bnx2fc_rport { #define BNX2FC_FLAG_UPLD_REQ_COMPL 0x7 #define BNX2FC_FLAG_EXPL_LOGO 0x8 #define BNX2FC_FLAG_DISABLE_FAILED 0x9 +#define BNX2FC_FLAG_ENABLED 0xa u8 src_addr[ETH_ALEN]; u32 max_sqes; @@ -468,6 +491,8 @@ int bnx2fc_send_fw_fcoe_init_msg(struct bnx2fc_hba *hba); int bnx2fc_send_fw_fcoe_destroy_msg(struct bnx2fc_hba *hba); int bnx2fc_send_session_ofld_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); +int bnx2fc_send_session_enable_req(struct fcoe_port *port, + struct bnx2fc_rport *tgt); int bnx2fc_send_session_disable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); int bnx2fc_send_session_destroy_req(struct bnx2fc_hba *hba, diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 70ecd953a579..6401db494ef5 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -22,7 +22,7 @@ DEFINE_PER_CPU(struct bnx2fc_percpu_s, bnx2fc_percpu); #define DRV_MODULE_NAME "bnx2fc" #define DRV_MODULE_VERSION BNX2FC_VERSION -#define DRV_MODULE_RELDATE "Jun 04, 2012" +#define DRV_MODULE_RELDATE "Dec 21, 2012" static char version[] = @@ -687,11 +687,16 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) BNX2FC_HBA_DBG(lport, "FW stat req timed out\n"); return bnx2fc_stats; } - bnx2fc_stats->invalid_crc_count += fw_stats->rx_stat2.fc_crc_cnt; - bnx2fc_stats->tx_frames += fw_stats->tx_stat.fcoe_tx_pkt_cnt; - bnx2fc_stats->tx_words += (fw_stats->tx_stat.fcoe_tx_byte_cnt) / 4; - bnx2fc_stats->rx_frames += fw_stats->rx_stat0.fcoe_rx_pkt_cnt; - bnx2fc_stats->rx_words += (fw_stats->rx_stat0.fcoe_rx_byte_cnt) / 4; + BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt); + bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_pkt_cnt); + bnx2fc_stats->tx_frames += hba->bfw_stats.fcoe_tx_pkt_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_byte_cnt); + bnx2fc_stats->tx_words += ((hba->bfw_stats.fcoe_tx_byte_cnt) / 4); + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_pkt_cnt); + bnx2fc_stats->rx_frames += hba->bfw_stats.fcoe_rx_pkt_cnt; + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_byte_cnt); + bnx2fc_stats->rx_words += ((hba->bfw_stats.fcoe_rx_byte_cnt) / 4); bnx2fc_stats->dumped_frames = 0; bnx2fc_stats->lip_count = 0; @@ -700,6 +705,8 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) bnx2fc_stats->loss_of_signal_count = 0; bnx2fc_stats->prim_seq_protocol_err_count = 0; + memcpy(&hba->prev_stats, hba->stats_buffer, + sizeof(struct fcoe_statistics_params)); return bnx2fc_stats; } @@ -2660,7 +2667,7 @@ static struct scsi_host_template bnx2fc_shost_template = { .can_queue = BNX2FC_CAN_QUEUE, .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = BNX2FC_MAX_BDS_PER_CMD, - .max_sectors = 512, + .max_sectors = 1024, }; static struct libfc_function_template bnx2fc_libfc_fcn_templ = { diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index ef60afa94d0e..85ea98a80f40 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -347,7 +347,7 @@ int bnx2fc_send_session_ofld_req(struct fcoe_port *port, * @port: port structure pointer * @tgt: bnx2fc_rport structure pointer */ -static int bnx2fc_send_session_enable_req(struct fcoe_port *port, +int bnx2fc_send_session_enable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt) { struct kwqe *kwqe_arr[2]; @@ -759,8 +759,6 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe) case FCOE_ERROR_CODE_DATA_SOFN_SEQ_ACTIVE_RESET: BNX2FC_TGT_DBG(tgt, "REC TOV popped for xid - 0x%x\n", xid); - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); if (!test_bit(BNX2FC_FLAG_SRR_SENT, @@ -847,8 +845,6 @@ ret_err_rqe: goto ret_warn_rqe; } - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); @@ -1124,7 +1120,6 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, struct bnx2fc_interface *interface; u32 conn_id; u32 context_id; - int rc; conn_id = ofld_kcqe->fcoe_conn_id; context_id = ofld_kcqe->fcoe_conn_context_id; @@ -1153,17 +1148,10 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, "resources\n"); set_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, &tgt->flags); } - goto ofld_cmpl_err; } else { - - /* now enable the session */ - rc = bnx2fc_send_session_enable_req(port, tgt); - if (rc) { - printk(KERN_ERR PFX "enable session failed\n"); - goto ofld_cmpl_err; - } + /* FW offload request successfully completed */ + set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); } - return; ofld_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); @@ -1210,15 +1198,9 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba, printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n"); goto enbl_cmpl_err; } - if (ofld_kcqe->completion_status) - goto enbl_cmpl_err; - else { + if (!ofld_kcqe->completion_status) /* enable successful - rport ready for issuing IOs */ - set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); - set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); - wake_up_interruptible(&tgt->ofld_wait); - } - return; + set_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); enbl_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); @@ -1251,6 +1233,7 @@ static void bnx2fc_process_conn_disable_cmpl(struct bnx2fc_hba *hba, /* disable successful */ BNX2FC_TGT_DBG(tgt, "disable successful\n"); clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_DISABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 8d4626c07a12..60798e829de6 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -654,7 +654,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) mp_req->mp_resp_bd = dma_alloc_coherent(&hba->pcidev->dev, sz, &mp_req->mp_resp_bd_dma, GFP_ATOMIC); - if (!mp_req->mp_req_bd) { + if (!mp_req->mp_resp_bd) { printk(KERN_ERR PFX "unable to alloc MP resp bd\n"); bnx2fc_free_mp_resc(io_req); return FAILED; @@ -685,8 +685,8 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) { struct fc_lport *lport; - struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device)); - struct fc_rport_libfc_priv *rp = rport->dd_data; + struct fc_rport *rport; + struct fc_rport_libfc_priv *rp; struct fcoe_port *port; struct bnx2fc_interface *interface; struct bnx2fc_rport *tgt; @@ -704,6 +704,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) unsigned long start = jiffies; lport = shost_priv(host); + rport = starget_to_rport(scsi_target(sc_cmd->device)); port = lport_priv(lport); interface = port->priv; @@ -712,6 +713,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) rc = FAILED; goto tmf_err; } + rp = rport->dd_data; rc = fc_block_scsi_eh(sc_cmd); if (rc) diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index b9d0d9cb17f9..c57a3bb8a9fb 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -33,6 +33,7 @@ static void bnx2fc_upld_timer(unsigned long data) BNX2FC_TGT_DBG(tgt, "upld_timer - Upload compl not received!!\n"); /* fake upload completion */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); } @@ -55,10 +56,25 @@ static void bnx2fc_ofld_timer(unsigned long data) * resources are freed up in bnx2fc_offload_session */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); } +static void bnx2fc_ofld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); + mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); + + wait_event_interruptible(tgt->ofld_wait, + (test_bit( + BNX2FC_FLAG_OFLD_REQ_CMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->ofld_timer); +} + static void bnx2fc_offload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt, struct fc_rport_priv *rdata) @@ -103,17 +119,7 @@ retry_ofld: * wait for the session is offloaded and enabled. 3 Secs * should be ample time for this process to complete. */ - setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); - mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->ofld_wait, - (test_bit( - BNX2FC_FLAG_OFLD_REQ_CMPL, - &tgt->flags))); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->ofld_timer); + bnx2fc_ofld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { if (test_and_clear_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, @@ -131,14 +137,23 @@ retry_ofld: } if (bnx2fc_map_doorbell(tgt)) { printk(KERN_ERR PFX "map doorbell failed - no mem\n"); - /* upload will take care of cleaning up sess resc */ - lport->tt.rport_logoff(rdata); + goto ofld_err; } + clear_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); + rval = bnx2fc_send_session_enable_req(port, tgt); + if (rval) { + pr_err(PFX "enable session failed\n"); + goto ofld_err; + } + bnx2fc_ofld_wait(tgt); + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) + goto ofld_err; return; ofld_err: /* couldn't offload the session. log off from this rport */ BNX2FC_TGT_DBG(tgt, "bnx2fc_offload_session - offload error\n"); + clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); /* Free session resources */ bnx2fc_free_session_resc(hba, tgt); tgt_init_err: @@ -259,6 +274,19 @@ void bnx2fc_flush_active_ios(struct bnx2fc_rport *tgt) spin_unlock_bh(&tgt->tgt_lock); } +static void bnx2fc_upld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); + mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); + wait_event_interruptible(tgt->upld_wait, + (test_bit( + BNX2FC_FLAG_UPLD_REQ_COMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->upld_timer); +} + static void bnx2fc_upload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt) { @@ -279,19 +307,8 @@ static void bnx2fc_upload_session(struct fcoe_port *port, * wait for upload to complete. 3 Secs * should be sufficient time for this process to complete. */ - setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - BNX2FC_TGT_DBG(tgt, "waiting for disable compl\n"); - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); - - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); + bnx2fc_upld_wait(tgt); /* * traverse thru the active_q and tmf_q and cleanup @@ -308,24 +325,13 @@ static void bnx2fc_upload_session(struct fcoe_port *port, bnx2fc_send_session_destroy_req(hba, tgt); /* wait for destroy to complete */ - setup_timer(&tgt->upld_timer, - bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); + bnx2fc_upld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_DESTROYED, &tgt->flags))) printk(KERN_ERR PFX "ERROR!! destroy timed out\n"); BNX2FC_TGT_DBG(tgt, "destroy wait complete flags = 0x%lx\n", tgt->flags); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); } else if (test_bit(BNX2FC_FLAG_DISABLE_FAILED, &tgt->flags)) { printk(KERN_ERR PFX "ERROR!! DISABLE req failed, destroy" @@ -381,7 +387,9 @@ static int bnx2fc_init_tgt(struct bnx2fc_rport *tgt, tgt->rq_cons_idx = 0; atomic_set(&tgt->num_active_ios, 0); - if (rdata->flags & FC_RP_FLAGS_RETRY) { + if (rdata->flags & FC_RP_FLAGS_RETRY && + rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET && + !(rdata->ids.roles & FC_RPORT_ROLE_FCP_INITIATOR)) { tgt->dev_type = TYPE_TAPE; tgt->io_timeout = 0; /* use default ULP timeout */ } else { @@ -479,7 +487,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, tgt = (struct bnx2fc_rport *)&rp[1]; /* This can happen when ADISC finds the same target */ - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { BNX2FC_TGT_DBG(tgt, "already offloaded\n"); mutex_unlock(&hba->hba_mutex); return; @@ -494,11 +502,8 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, BNX2FC_TGT_DBG(tgt, "OFFLOAD num_ofld_sess = %d\n", hba->num_ofld_sess); - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { - /* - * Session is offloaded and enabled. Map - * doorbell register for this target - */ + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { + /* Session is offloaded and enabled. */ BNX2FC_TGT_DBG(tgt, "sess offloaded\n"); /* This counter is protected with hba mutex */ hba->num_ofld_sess++; @@ -535,7 +540,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, */ tgt = (struct bnx2fc_rport *)&rp[1]; - if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) { mutex_unlock(&hba->hba_mutex); break; } diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 91eec60252ee..a28b03e5a5f6 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1317,7 +1317,7 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba) (1ULL << ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN)); if (error_mask1) { iscsi_init2.error_bit_map[0] = error_mask1; - mask64 &= (u32)(~mask64); + mask64 ^= (u32)(mask64); mask64 |= error_mask1; } else iscsi_init2.error_bit_map[0] = (u32) mask64; diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 8ecdb94a59f4..bdd78fb4fc70 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -2131,13 +2131,16 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) value_to_add = 4 - (cf->size % 4); cfg_data = kzalloc(cf->size+value_to_add, GFP_KERNEL); - if (cfg_data == NULL) - return -ENOMEM; + if (cfg_data == NULL) { + ret = -ENOMEM; + goto leave; + } memcpy((void *)cfg_data, (const void *)cf->data, cf->size); - - if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) - return -EINVAL; + if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) { + ret = -EINVAL; + goto leave; + } mtype = FW_PARAMS_PARAM_Y_GET(*fw_cfg_param); maddr = FW_PARAMS_PARAM_Z_GET(*fw_cfg_param) << 16; @@ -2149,9 +2152,9 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) strncpy(path, "/lib/firmware/" CSIO_CF_FNAME, 64); } +leave: kfree(cfg_data); release_firmware(cf); - return ret; } diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index c323b2030afa..0604b5ff3638 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -60,13 +60,6 @@ static struct scsi_transport_template *csio_fcoe_transport_vport; /* * debugfs support */ -static int -csio_mem_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -110,7 +103,7 @@ csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static const struct file_operations csio_mem_debugfs_fops = { .owner = THIS_MODULE, - .open = csio_mem_open, + .open = simple_open, .read = csio_mem_read, .llseek = default_llseek, }; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index f924b3c3720e..3fecf35ba292 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1564,6 +1564,7 @@ static int t4_uld_state_change(void *handle, enum cxgb4_state state) break; case CXGB4_STATE_DETACH: pr_info("cdev 0x%p, DETACH.\n", cdev); + cxgbi_device_unregister(cdev); break; default: pr_info("cdev 0x%p, unknown state %d.\n", cdev, state); diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 3c53c3478ee7..483eb9dbe663 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -495,7 +495,8 @@ void fnic_eth_send(struct fcoe_ctlr *fip, struct sk_buff *skb) } fnic_queue_wq_eth_desc(wq, skb, pa, skb->len, - fnic->vlan_hw_insert, fnic->vlan_id, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1); spin_unlock_irqrestore(&fnic->wq_lock[0], flags); } @@ -563,7 +564,8 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp) } fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp), - fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1, 1, 1); fnic_send_frame_end: spin_unlock_irqrestore(&fnic->wq_lock[0], flags); diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 599790e41a98..59bceac51a4c 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -1107,14 +1107,8 @@ static int gdth_init_pci(struct pci_dev *pdev, gdth_pci_str *pcistr, pci_read_config_word(pdev, PCI_COMMAND, &command); command |= 6; pci_write_config_word(pdev, PCI_COMMAND, command); - if (pci_resource_start(pdev, 8) == 1UL) - pci_resource_start(pdev, 8) = 0UL; - i = 0xFEFF0001UL; - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, i); - gdth_delay(1); - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, - pci_resource_start(pdev, 8)); - + gdth_delay(1); + dp6m_ptr = ha->brd; /* Ensure that it is safe to access the non HW portions of DPMEM. diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1d7da3f41ebb..8fa79b83f2d3 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -98,6 +98,7 @@ static unsigned int ipr_transop_timeout = 0; static unsigned int ipr_debug = 0; static unsigned int ipr_max_devs = IPR_DEFAULT_SIS64_DEVS; static unsigned int ipr_dual_ioa_raid = 1; +static unsigned int ipr_number_of_msix = 2; static DEFINE_SPINLOCK(ipr_driver_lock); /* This table describes the differences between DMA controller chips */ @@ -107,6 +108,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x0022C, .clr_interrupt_mask_reg = 0x00230, @@ -131,6 +133,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x00288, .clr_interrupt_mask_reg = 0x0028C, @@ -155,6 +158,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 1000, .cache_line_size = 0x20, .clear_isr = 0, + .iopoll_weight = 64, { .set_interrupt_mask_reg = 0x00010, .clr_interrupt_mask_reg = 0x00018, @@ -215,6 +219,8 @@ MODULE_PARM_DESC(dual_ioa_raid, "Enable dual adapter RAID support. Set to 1 to e module_param_named(max_devs, ipr_max_devs, int, 0); MODULE_PARM_DESC(max_devs, "Specify the maximum number of physical devices. " "[Default=" __stringify(IPR_DEFAULT_SIS64_DEVS) "]"); +module_param_named(number_of_msix, ipr_number_of_msix, int, 0); +MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to use on capable adapters (1 - 5). (default:2)"); MODULE_LICENSE("GPL"); MODULE_VERSION(IPR_DRIVER_VERSION); @@ -549,7 +555,8 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, struct ipr_trace_entry *trace_entry; struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - trace_entry = &ioa_cfg->trace[ioa_cfg->trace_index++]; + trace_entry = &ioa_cfg->trace[atomic_add_return + (1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES]; trace_entry->time = jiffies; trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0]; trace_entry->type = type; @@ -560,6 +567,7 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, trace_entry->cmd_index = ipr_cmd->cmd_index & 0xff; trace_entry->res_handle = ipr_cmd->ioarcb.res_handle; trace_entry->u.add_data = add_data; + wmb(); } #else #define ipr_trc_hook(ipr_cmd, type, add_data) do { } while (0) @@ -595,8 +603,11 @@ static void ipr_reinit_ipr_cmnd(struct ipr_cmnd *ipr_cmd) struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa; struct ipr_ioasa64 *ioasa64 = &ipr_cmd->s.ioasa64; dma_addr_t dma_addr = ipr_cmd->dma_addr; + int hrrq_id; + hrrq_id = ioarcb->cmd_pkt.hrrq_id; memset(&ioarcb->cmd_pkt, 0, sizeof(struct ipr_cmd_pkt)); + ioarcb->cmd_pkt.hrrq_id = hrrq_id; ioarcb->data_transfer_length = 0; ioarcb->read_data_transfer_length = 0; ioarcb->ioadl_len = 0; @@ -646,12 +657,16 @@ static void ipr_init_ipr_cmnd(struct ipr_cmnd *ipr_cmd, * pointer to ipr command struct **/ static -struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) +struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_hrr_queue *hrrq) { - struct ipr_cmnd *ipr_cmd; + struct ipr_cmnd *ipr_cmd = NULL; + + if (likely(!list_empty(&hrrq->hrrq_free_q))) { + ipr_cmd = list_entry(hrrq->hrrq_free_q.next, + struct ipr_cmnd, queue); + list_del(&ipr_cmd->queue); + } - ipr_cmd = list_entry(ioa_cfg->free_q.next, struct ipr_cmnd, queue); - list_del(&ipr_cmd->queue); return ipr_cmd; } @@ -666,7 +681,8 @@ struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) static struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) { - struct ipr_cmnd *ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); + struct ipr_cmnd *ipr_cmd = + __ipr_get_free_ipr_cmnd(&ioa_cfg->hrrq[IPR_INIT_HRRQ]); ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); return ipr_cmd; } @@ -686,9 +702,15 @@ static void ipr_mask_and_clear_interrupts(struct ipr_ioa_cfg *ioa_cfg, u32 clr_ints) { volatile u32 int_reg; + int i; /* Stop new interrupts */ - ioa_cfg->allow_interrupts = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); /* Set interrupt mask to stop all new interrupts */ if (ioa_cfg->sis64) @@ -761,13 +783,12 @@ static int ipr_set_pcix_cmd_reg(struct ipr_ioa_cfg *ioa_cfg) **/ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ata_queued_cmd *qc = ipr_cmd->qc; struct ipr_sata_port *sata_port = qc->ap->private_data; qc->err_mask |= AC_ERR_OTHER; sata_port->ioasa.status |= ATA_BUSY; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); ata_qc_complete(qc); } @@ -783,14 +804,13 @@ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) **/ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; scsi_cmd->result |= (DID_ERROR << 16); scsi_dma_unmap(ipr_cmd->scsi_cmd); scsi_cmd->scsi_done(scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -805,24 +825,32 @@ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) static void ipr_fail_all_ops(struct ipr_ioa_cfg *ioa_cfg) { struct ipr_cmnd *ipr_cmd, *temp; + struct ipr_hrr_queue *hrrq; ENTER; - list_for_each_entry_safe(ipr_cmd, temp, &ioa_cfg->pending_q, queue) { - list_del(&ipr_cmd->queue); + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry_safe(ipr_cmd, + temp, &hrrq->hrrq_pending_q, queue) { + list_del(&ipr_cmd->queue); - ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); - ipr_cmd->s.ioasa.hdr.ilid = cpu_to_be32(IPR_DRIVER_ILID); + ipr_cmd->s.ioasa.hdr.ioasc = + cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); + ipr_cmd->s.ioasa.hdr.ilid = + cpu_to_be32(IPR_DRIVER_ILID); - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - else if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + else if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, IPR_IOASC_IOA_WAS_RESET); - del_timer(&ipr_cmd->timer); - ipr_cmd->done(ipr_cmd); + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, + IPR_IOASC_IOA_WAS_RESET); + del_timer(&ipr_cmd->timer); + ipr_cmd->done(ipr_cmd); + } + spin_unlock(&hrrq->_lock); } - LEAVE; } @@ -872,9 +900,7 @@ static void ipr_do_req(struct ipr_cmnd *ipr_cmd, void (*done) (struct ipr_cmnd *), void (*timeout_func) (struct ipr_cmnd *), u32 timeout) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = done; @@ -975,6 +1001,14 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd, spin_lock_irq(ioa_cfg->host->host_lock); } +static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg) +{ + if (ioa_cfg->hrrq_num == 1) + return 0; + else + return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1; +} + /** * ipr_send_hcam - Send an HCAM to the adapter. * @ioa_cfg: ioa config struct @@ -994,9 +1028,9 @@ static void ipr_send_hcam(struct ipr_ioa_cfg *ioa_cfg, u8 type, struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; - if (ioa_cfg->allow_cmds) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); list_add_tail(&hostrcb->queue, &ioa_cfg->hostrcb_pending_q); ipr_cmd->u.hostrcb = hostrcb; @@ -1166,14 +1200,15 @@ static int ipr_is_same_device(struct ipr_resource_entry *res, } /** - * ipr_format_res_path - Format the resource path for printing. + * __ipr_format_res_path - Format the resource path for printing. * @res_path: resource path * @buf: buffer + * @len: length of buffer provided * * Return value: * pointer to buffer **/ -static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) +static char *__ipr_format_res_path(u8 *res_path, char *buffer, int len) { int i; char *p = buffer; @@ -1187,6 +1222,27 @@ static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) } /** + * ipr_format_res_path - Format the resource path for printing. + * @ioa_cfg: ioa config struct + * @res_path: resource path + * @buf: buffer + * @len: length of buffer provided + * + * Return value: + * pointer to buffer + **/ +static char *ipr_format_res_path(struct ipr_ioa_cfg *ioa_cfg, + u8 *res_path, char *buffer, int len) +{ + char *p = buffer; + + *p = '\0'; + p += snprintf(p, buffer + len - p, "%d/", ioa_cfg->host->host_no); + __ipr_format_res_path(res_path, p, len - (buffer - p)); + return buffer; +} + +/** * ipr_update_res_entry - Update the resource entry. * @res: resource entry struct * @cfgtew: config table entry wrapper struct @@ -1226,8 +1282,8 @@ static void ipr_update_res_entry(struct ipr_resource_entry *res, if (res->sdev && new_path) sdev_printk(KERN_INFO, res->sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(res->ioa_cfg, + res->res_path, buffer, sizeof(buffer))); } else { res->flags = cfgtew->u.cfgte->flags; if (res->flags & IPR_IS_IOA_RESOURCE) @@ -1363,7 +1419,7 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd) u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ioasc) { if (ioasc != IPR_IOASC_IOA_WAS_RESET) @@ -1613,8 +1669,8 @@ static void ipr_log_sis64_config_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err_separator; ipr_err("Device %d : %s", i + 1, - ipr_format_res_path(dev_entry->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(dev_entry->res_path, + buffer, sizeof(buffer))); ipr_log_ext_vpd(&dev_entry->vpd); ipr_err("-----New Device Information-----\n"); @@ -1960,14 +2016,16 @@ static void ipr_log64_fabric_path(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s\n", path_active_desc[i].desc, path_state_desc[j].desc, - ipr_format_res_path(fabric->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, + fabric->res_path, + buffer, sizeof(buffer))); return; } } ipr_err("Path state=%02X Resource Path=%s\n", path_state, - ipr_format_res_path(fabric->res_path, buffer, sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, fabric->res_path, + buffer, sizeof(buffer))); } static const struct { @@ -2108,18 +2166,20 @@ static void ipr_log64_path_elem(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s, Link rate=%s, WWN=%08X%08X\n", path_status_desc[j].desc, path_type_desc[i].desc, - ipr_format_res_path(cfg->res_path, buffer, - sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), + be32_to_cpu(cfg->wwid[1])); return; } } ipr_hcam_err(hostrcb, "Path element=%02X: Resource Path=%s, Link rate=%s " "WWN=%08X%08X\n", cfg->type_status, - ipr_format_res_path(cfg->res_path, buffer, sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); } /** @@ -2182,7 +2242,8 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("RAID %s Array Configuration: %s\n", error->protection_level, - ipr_format_res_path(error->last_res_path, buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, error->last_res_path, + buffer, sizeof(buffer))); ipr_err_separator; @@ -2203,11 +2264,12 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("Array Member %d:\n", i); ipr_log_ext_vpd(&array_entry->vpd); ipr_err("Current Location: %s\n", - ipr_format_res_path(array_entry->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, array_entry->res_path, + buffer, sizeof(buffer))); ipr_err("Expected Location: %s\n", - ipr_format_res_path(array_entry->expected_res_path, - buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + array_entry->expected_res_path, + buffer, sizeof(buffer))); ipr_err_separator; } @@ -2409,7 +2471,7 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd) fd_ioasc = be32_to_cpu(hostrcb->hcam.u.error.fd_ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (!ioasc) { ipr_handle_log_data(ioa_cfg, hostrcb); @@ -2491,36 +2553,6 @@ static void ipr_oper_timeout(struct ipr_cmnd *ipr_cmd) } /** - * ipr_reset_reload - Reset/Reload the IOA - * @ioa_cfg: ioa config struct - * @shutdown_type: shutdown type - * - * This function resets the adapter and re-initializes it. - * This function assumes that all new host commands have been stopped. - * Return value: - * SUCCESS / FAILED - **/ -static int ipr_reset_reload(struct ipr_ioa_cfg *ioa_cfg, - enum ipr_shutdown_type shutdown_type) -{ - if (!ioa_cfg->in_reset_reload) - ipr_initiate_ioa_reset(ioa_cfg, shutdown_type); - - spin_unlock_irq(ioa_cfg->host->host_lock); - wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); - spin_lock_irq(ioa_cfg->host->host_lock); - - /* If we got hit with a host reset while we were already resetting - the adapter for some reason, and the reset failed. */ - if (ioa_cfg->ioa_is_dead) { - ipr_trace; - return FAILED; - } - - return SUCCESS; -} - -/** * ipr_find_ses_entry - Find matching SES in SES table * @res: resource entry struct of SES * @@ -3153,7 +3185,8 @@ static void ipr_worker_thread(struct work_struct *work) restart: do { did_work = 0; - if (!ioa_cfg->allow_cmds || !ioa_cfg->allow_ml_add_del) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds || + !ioa_cfg->allow_ml_add_del) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); return; } @@ -3401,7 +3434,7 @@ static ssize_t ipr_show_adapter_state(struct device *dev, int len; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) len = snprintf(buf, PAGE_SIZE, "offline\n"); else len = snprintf(buf, PAGE_SIZE, "online\n"); @@ -3427,14 +3460,20 @@ static ssize_t ipr_store_adapter_state(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; unsigned long lock_flags; - int result = count; + int result = count, i; if (!capable(CAP_SYS_ADMIN)) return -EACCES; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead && !strncmp(buf, "online", 6)) { - ioa_cfg->ioa_is_dead = 0; + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && + !strncmp(buf, "online", 6)) { + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ioa_cfg->reset_retries = 0; ioa_cfg->in_ioa_bringdown = 0; ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); @@ -3494,6 +3533,95 @@ static struct device_attribute ipr_ioa_reset_attr = { .store = ipr_store_reset_adapter }; +static int ipr_iopoll(struct blk_iopoll *iop, int budget); + /** + * ipr_show_iopoll_weight - Show ipr polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_show_iopoll_weight(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long lock_flags = 0; + int len; + + spin_lock_irqsave(shost->host_lock, lock_flags); + len = snprintf(buf, PAGE_SIZE, "%d\n", ioa_cfg->iopoll_weight); + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return len; +} + +/** + * ipr_store_iopoll_weight - Change the adapter's polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_store_iopoll_weight(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long user_iopoll_weight; + unsigned long lock_flags = 0; + int i; + + if (!ioa_cfg->sis64) { + dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); + return -EINVAL; + } + if (kstrtoul(buf, 10, &user_iopoll_weight)) + return -EINVAL; + + if (user_iopoll_weight > 256) { + dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); + return -EINVAL; + } + + if (user_iopoll_weight == ioa_cfg->iopoll_weight) { + dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); + return strlen(buf); + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + + spin_lock_irqsave(shost->host_lock, lock_flags); + ioa_cfg->iopoll_weight = user_iopoll_weight; + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return strlen(buf); +} + +static struct device_attribute ipr_iopoll_weight_attr = { + .attr = { + .name = "iopoll_weight", + .mode = S_IRUGO | S_IWUSR, + }, + .show = ipr_show_iopoll_weight, + .store = ipr_store_iopoll_weight +}; + /** * ipr_alloc_ucode_buffer - Allocates a microcode download buffer * @buf_len: buffer length @@ -3862,6 +3990,7 @@ static struct device_attribute *ipr_ioa_attrs[] = { &ipr_ioa_reset_attr, &ipr_update_fw_attr, &ipr_ioa_fw_type_attr, + &ipr_iopoll_weight_attr, NULL, }; @@ -4014,7 +4143,7 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg) ioa_cfg->dump = dump; ioa_cfg->sdt_state = WAIT_FOR_DUMP; - if (ioa_cfg->ioa_is_dead && !ioa_cfg->dump_taken) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && !ioa_cfg->dump_taken) { ioa_cfg->dump_taken = 1; schedule_work(&ioa_cfg->work_q); } @@ -4227,8 +4356,8 @@ static ssize_t ipr_show_resource_path(struct device *dev, struct device_attribut res = (struct ipr_resource_entry *)sdev->hostdata; if (res && ioa_cfg->sis64) len = snprintf(buf, PAGE_SIZE, "%s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(res->res_path, buffer, + sizeof(buffer))); else if (res) len = snprintf(buf, PAGE_SIZE, "%d:%d:%d:%d\n", ioa_cfg->host->host_no, res->bus, res->target, res->lun); @@ -4556,8 +4685,8 @@ static int ipr_slave_configure(struct scsi_device *sdev) scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun); if (ioa_cfg->sis64) sdev_printk(KERN_INFO, sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + res->res_path, buffer, sizeof(buffer))); return 0; } spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); @@ -4638,22 +4767,18 @@ static int ipr_slave_alloc(struct scsi_device *sdev) return rc; } -/** - * ipr_eh_host_reset - Reset the host adapter - * @scsi_cmd: scsi command struct - * - * Return value: - * SUCCESS / FAILED - **/ -static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) +static int ipr_eh_host_reset(struct scsi_cmnd *cmd) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + unsigned long lock_flags = 0; + int rc = SUCCESS; ENTER; - ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; + ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->in_reset_reload) { + ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_ABBREV); dev_err(&ioa_cfg->pdev->dev, "Adapter being reset as a result of error recovery.\n"); @@ -4661,20 +4786,19 @@ static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) ioa_cfg->sdt_state = GET_DUMP; } - rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV); - - LEAVE; - return rc; -} - -static int ipr_eh_host_reset(struct scsi_cmnd *cmd) -{ - int rc; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - spin_lock_irq(cmd->device->host->host_lock); - rc = __ipr_eh_host_reset(cmd); - spin_unlock_irq(cmd->device->host->host_lock); + /* If we got hit with a host reset while we were already resetting + the adapter for some reason, and the reset failed. */ + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { + ipr_trace; + rc = FAILED; + } + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + LEAVE; return rc; } @@ -4723,7 +4847,7 @@ static int ipr_device_reset(struct ipr_ioa_cfg *ioa_cfg, ipr_send_blocking_cmd(ipr_cmd, ipr_timeout, IPR_DEVICE_RESET_TIMEOUT); ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ipr_is_gata(res) && res->sata_port && ioasc != IPR_IOASC_IOA_WAS_RESET) { if (ipr_cmd->ioa_cfg->sis64) memcpy(&res->sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, @@ -4793,6 +4917,7 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) struct ipr_resource_entry *res; struct ata_port *ap; int rc = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; @@ -4808,22 +4933,26 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) */ if (ioa_cfg->in_reset_reload) return FAILED; - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; - if (ipr_cmd->qc && !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { - ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; - ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == res->res_handle) { + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->qc && + !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { + ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; + ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + } } } + spin_unlock(&hrrq->_lock); } - res->resetting_device = 1; scmd_printk(KERN_ERR, scsi_cmd, "Resetting device\n"); @@ -4833,11 +4962,17 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) ata_std_error_handler(ap); spin_lock_irq(scsi_cmd->device->host->host_lock); - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - rc = -EIO; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, + &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == + res->res_handle) { + rc = -EIO; + break; + } } + spin_unlock(&hrrq->_lock); } } else rc = ipr_device_reset(ioa_cfg, res); @@ -4890,7 +5025,7 @@ static void ipr_bus_reset_done(struct ipr_cmnd *ipr_cmd) else ipr_cmd->sibling->done(ipr_cmd->sibling); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); LEAVE; } @@ -4951,6 +5086,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) struct ipr_cmd_pkt *cmd_pkt; u32 ioasc, int_reg; int op_found = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *)scsi_cmd->device->host->hostdata; @@ -4960,7 +5096,8 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) * This will force the mid-layer to call ipr_eh_host_reset, * which will then go to sleep and wait for the reset to complete */ - if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead) + if (ioa_cfg->in_reset_reload || + ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; if (!res) return FAILED; @@ -4975,12 +5112,16 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) if (!ipr_is_gscsi(res)) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->scsi_cmd == scsi_cmd) { - ipr_cmd->done = ipr_scsi_eh_done; - op_found = 1; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->scsi_cmd == scsi_cmd) { + ipr_cmd->done = ipr_scsi_eh_done; + op_found = 1; + break; + } } + spin_unlock(&hrrq->_lock); } if (!op_found) @@ -5007,7 +5148,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) ipr_trace; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); if (!ipr_is_naca_model(res)) res->needs_sync_complete = 1; @@ -5099,6 +5240,9 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, } else { if (int_reg & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + else if (int_reg & IPR_PCII_NO_HOST_RRQ) + dev_err(&ioa_cfg->pdev->dev, + "No Host RRQ. 0x%08X\n", int_reg); else dev_err(&ioa_cfg->pdev->dev, "Permanent IOA failure. 0x%08X\n", int_reg); @@ -5121,10 +5265,10 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, * Return value: * none **/ -static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) +static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg, u16 number) { ioa_cfg->errors_logged++; - dev_err(&ioa_cfg->pdev->dev, "%s\n", msg); + dev_err(&ioa_cfg->pdev->dev, "%s %d\n", msg, number); if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) ioa_cfg->sdt_state = GET_DUMP; @@ -5132,6 +5276,83 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); } +static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget, + struct list_head *doneq) +{ + u32 ioasc; + u16 cmd_index; + struct ipr_cmnd *ipr_cmd; + struct ipr_ioa_cfg *ioa_cfg = hrr_queue->ioa_cfg; + int num_hrrq = 0; + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrr_queue->allow_interrupts) + return 0; + + while ((be32_to_cpu(*hrr_queue->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrr_queue->toggle_bit) { + + cmd_index = (be32_to_cpu(*hrr_queue->hrrq_curr) & + IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> + IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; + + if (unlikely(cmd_index > hrr_queue->max_cmd_id || + cmd_index < hrr_queue->min_cmd_id)) { + ipr_isr_eh(ioa_cfg, + "Invalid response handle from IOA: ", + cmd_index); + break; + } + + ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; + ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); + + list_move_tail(&ipr_cmd->queue, doneq); + + if (hrr_queue->hrrq_curr < hrr_queue->hrrq_end) { + hrr_queue->hrrq_curr++; + } else { + hrr_queue->hrrq_curr = hrr_queue->hrrq_start; + hrr_queue->toggle_bit ^= 1u; + } + num_hrrq++; + if (budget > 0 && num_hrrq >= budget) + break; + } + + return num_hrrq; +} + +static int ipr_iopoll(struct blk_iopoll *iop, int budget) +{ + struct ipr_ioa_cfg *ioa_cfg; + struct ipr_hrr_queue *hrrq; + struct ipr_cmnd *ipr_cmd, *temp; + unsigned long hrrq_flags; + int completed_ops; + LIST_HEAD(doneq); + + hrrq = container_of(iop, struct ipr_hrr_queue, iopoll); + ioa_cfg = hrrq->ioa_cfg; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); + + if (completed_ops < budget) + blk_iopoll_complete(iop); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } + + return completed_ops; +} + /** * ipr_isr - Interrupt service routine * @irq: irq number @@ -5142,78 +5363,48 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) **/ static irqreturn_t ipr_isr(int irq, void *devp) { - struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)devp; - unsigned long lock_flags = 0; + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; u32 int_reg = 0; - u32 ioasc; - u16 cmd_index; int num_hrrq = 0; int irq_none = 0; struct ipr_cmnd *ipr_cmd, *temp; irqreturn_t rc = IRQ_NONE; LIST_HEAD(doneq); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* If interrupts are disabled, ignore the interrupt */ - if (!ioa_cfg->allow_interrupts) { - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_NONE; } while (1) { - ipr_cmd = NULL; - - while ((be32_to_cpu(*ioa_cfg->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == - ioa_cfg->toggle_bit) { - - cmd_index = (be32_to_cpu(*ioa_cfg->hrrq_curr) & - IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; - - if (unlikely(cmd_index >= IPR_NUM_CMD_BLKS)) { - ipr_isr_eh(ioa_cfg, "Invalid response handle from IOA"); - rc = IRQ_HANDLED; - goto unlock_out; - } - - ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; - - ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + if (ipr_process_hrrq(hrrq, -1, &doneq)) { + rc = IRQ_HANDLED; - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); - - list_move_tail(&ipr_cmd->queue, &doneq); - - rc = IRQ_HANDLED; - - if (ioa_cfg->hrrq_curr < ioa_cfg->hrrq_end) { - ioa_cfg->hrrq_curr++; - } else { - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit ^= 1u; - } - } - - if (ipr_cmd && !ioa_cfg->clear_isr) - break; + if (!ioa_cfg->clear_isr) + break; - if (ipr_cmd != NULL) { /* Clear the PCI interrupt */ num_hrrq = 0; do { - writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32); + writel(IPR_PCII_HRRQ_UPDATED, + ioa_cfg->regs.clr_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); } while (int_reg & IPR_PCII_HRRQ_UPDATED && - num_hrrq++ < IPR_MAX_HRRQ_RETRIES); + num_hrrq++ < IPR_MAX_HRRQ_RETRIES); } else if (rc == IRQ_NONE && irq_none == 0) { int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); irq_none++; } else if (num_hrrq == IPR_MAX_HRRQ_RETRIES && int_reg & IPR_PCII_HRRQ_UPDATED) { - ipr_isr_eh(ioa_cfg, "Error clearing HRRQ"); + ipr_isr_eh(ioa_cfg, + "Error clearing HRRQ: ", num_hrrq); rc = IRQ_HANDLED; - goto unlock_out; + break; } else break; } @@ -5221,14 +5412,64 @@ static irqreturn_t ipr_isr(int irq, void *devp) if (unlikely(rc == IRQ_NONE)) rc = ipr_handle_other_interrupt(ioa_cfg, int_reg); -unlock_out: - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { list_del(&ipr_cmd->queue); del_timer(&ipr_cmd->timer); ipr_cmd->fast_done(ipr_cmd); } + return rc; +} + +/** + * ipr_isr_mhrrq - Interrupt service routine + * @irq: irq number + * @devp: pointer to ioa config struct + * + * Return value: + * IRQ_NONE / IRQ_HANDLED + **/ +static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) +{ + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; + struct ipr_cmnd *ipr_cmd, *temp; + irqreturn_t rc = IRQ_NONE; + LIST_HEAD(doneq); + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_NONE; + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) { + if (!blk_iopoll_sched_prep(&hrrq->iopoll)) + blk_iopoll_sched(&hrrq->iopoll); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_HANDLED; + } + } else { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) + + if (ipr_process_hrrq(hrrq, -1, &doneq)) + rc = IRQ_HANDLED; + } + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } return rc; } @@ -5388,7 +5629,6 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) { struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; struct ipr_resource_entry *res = scsi_cmd->device->hostdata; - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); if (IPR_IOASC_SENSE_KEY(ioasc) > 0) { @@ -5406,7 +5646,7 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) res->in_erp = 0; } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5790,7 +6030,7 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg, } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5809,21 +6049,21 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - unsigned long lock_flags; + unsigned long hrrq_flags; scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len)); if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { scsi_dma_unmap(scsi_cmd); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } else { - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); ipr_erp_start(ioa_cfg, ipr_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } } @@ -5846,22 +6086,34 @@ static int ipr_queuecommand(struct Scsi_Host *shost, struct ipr_resource_entry *res; struct ipr_ioarcb *ioarcb; struct ipr_cmnd *ipr_cmd; - unsigned long lock_flags; + unsigned long hrrq_flags, lock_flags; int rc; + struct ipr_hrr_queue *hrrq; + int hrrq_id; ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; - spin_lock_irqsave(shost->host_lock, lock_flags); scsi_cmd->result = (DID_OK << 16); res = scsi_cmd->device->hostdata; + if (ipr_is_gata(res) && res->sata_port) { + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + return rc; + } + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* * We are currently blocking all devices due to a host reset * We have told the host to stop giving us new requests, but * ERP ops don't count. FIXME */ - if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(!hrrq->allow_cmds && !hrrq->ioa_is_dead)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return SCSI_MLQUEUE_HOST_BUSY; } @@ -5869,19 +6121,17 @@ static int ipr_queuecommand(struct Scsi_Host *shost, * FIXME - Create scsi_set_host_offline interface * and the ioa_is_dead check can be removed */ - if (unlikely(ioa_cfg->ioa_is_dead || !res)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead || !res)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); goto err_nodev; } - if (ipr_is_gata(res) && res->sata_port) { - rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); - spin_unlock_irqrestore(shost->host_lock, lock_flags); - return rc; + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return SCSI_MLQUEUE_HOST_BUSY; } - - ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); ipr_init_ipr_cmnd(ipr_cmd, ipr_scsi_done); ioarcb = &ipr_cmd->ioarcb; @@ -5902,26 +6152,27 @@ static int ipr_queuecommand(struct Scsi_Host *shost, } if (scsi_cmd->cmnd[0] >= 0xC0 && - (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) + (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) { ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + } if (ioa_cfg->sis64) rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd); else rc = ipr_build_ioadl(ioa_cfg, ipr_cmd); - spin_lock_irqsave(shost->host_lock, lock_flags); - if (unlikely(rc || (!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); + if (unlikely(rc || (!hrrq->allow_cmds && !hrrq->ioa_is_dead))) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); if (!rc) scsi_dma_unmap(scsi_cmd); return SCSI_MLQUEUE_HOST_BUSY; } - if (unlikely(ioa_cfg->ioa_is_dead)) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); scsi_dma_unmap(scsi_cmd); goto err_nodev; } @@ -5931,18 +6182,18 @@ static int ipr_queuecommand(struct Scsi_Host *shost, ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE; res->needs_sync_complete = 0; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_pending_q); ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res)); ipr_send_command(ipr_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; err_nodev: - spin_lock_irqsave(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); scsi_cmd->result = (DID_NO_CONNECT << 16); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; } @@ -6040,7 +6291,7 @@ static void ipr_ata_phy_reset(struct ata_port *ap) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) goto out_unlock; rc = ipr_device_reset(ioa_cfg, res); @@ -6071,6 +6322,7 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) struct ipr_sata_port *sata_port = qc->ap->private_data; struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; unsigned long flags; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); @@ -6080,11 +6332,15 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->qc == qc) { - ipr_device_reset(ioa_cfg, sata_port->res); - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->qc == qc) { + ipr_device_reset(ioa_cfg, sata_port->res); + break; + } } + spin_unlock(&hrrq->_lock); } spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -6133,6 +6389,7 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) struct ipr_resource_entry *res = sata_port->res; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + spin_lock(&ipr_cmd->hrrq->_lock); if (ipr_cmd->ioa_cfg->sis64) memcpy(&sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, sizeof(struct ipr_ioasa_gata)); @@ -6148,7 +6405,8 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) qc->err_mask |= __ac_err_mask(sata_port->ioasa.status); else qc->err_mask |= ac_err_mask(sata_port->ioasa.status); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); ata_qc_complete(qc); } @@ -6244,6 +6502,48 @@ static void ipr_build_ata_ioadl(struct ipr_cmnd *ipr_cmd, } /** + * ipr_qc_defer - Get a free ipr_cmd + * @qc: queued command + * + * Return value: + * 0 if success + **/ +static int ipr_qc_defer(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ipr_sata_port *sata_port = ap->private_data; + struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; + struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; + int hrrq_id; + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + qc->lldd_task = NULL; + spin_lock(&hrrq->_lock); + if (unlikely(hrrq->ioa_is_dead)) { + spin_unlock(&hrrq->_lock); + return 0; + } + + if (unlikely(!hrrq->allow_cmds)) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + qc->lldd_task = ipr_cmd; + spin_unlock(&hrrq->_lock); + return 0; +} + +/** * ipr_qc_issue - Issue a SATA qc to a device * @qc: queued command * @@ -6260,10 +6560,23 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) struct ipr_ioarcb *ioarcb; struct ipr_ioarcb_ata_regs *regs; - if (unlikely(!ioa_cfg->allow_cmds || ioa_cfg->ioa_is_dead)) + if (qc->lldd_task == NULL) + ipr_qc_defer(qc); + + ipr_cmd = qc->lldd_task; + if (ipr_cmd == NULL) return AC_ERR_SYSTEM; - ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); + qc->lldd_task = NULL; + spin_lock(&ipr_cmd->hrrq->_lock); + if (unlikely(!ipr_cmd->hrrq->allow_cmds || + ipr_cmd->hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); + return AC_ERR_SYSTEM; + } + + ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); ioarcb = &ipr_cmd->ioarcb; if (ioa_cfg->sis64) { @@ -6275,7 +6588,7 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) memset(regs, 0, sizeof(*regs)); ioarcb->add_cmd_parms_len = cpu_to_be16(sizeof(*regs)); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->qc = qc; ipr_cmd->done = ipr_sata_done; ipr_cmd->ioarcb.res_handle = res->res_handle; @@ -6315,10 +6628,12 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) default: WARN_ON(1); + spin_unlock(&ipr_cmd->hrrq->_lock); return AC_ERR_INVALID; } ipr_send_command(ipr_cmd); + spin_unlock(&ipr_cmd->hrrq->_lock); return 0; } @@ -6357,6 +6672,7 @@ static struct ata_port_operations ipr_sata_ops = { .hardreset = ipr_sata_reset, .post_internal_cmd = ipr_ata_post_internal, .qc_prep = ata_noop_qc_prep, + .qc_defer = ipr_qc_defer, .qc_issue = ipr_qc_issue, .qc_fill_rtf = ipr_qc_fill_rtf, .port_start = ata_sas_port_start, @@ -6427,7 +6743,7 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd) ENTER; ioa_cfg->in_reset_reload = 0; ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); spin_unlock_irq(ioa_cfg->host->host_lock); @@ -6454,11 +6770,16 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_resource_entry *res; struct ipr_hostrcb *hostrcb, *temp; - int i = 0; + int i = 0, j; ENTER; ioa_cfg->in_reset_reload = 0; - ioa_cfg->allow_cmds = 1; + for (j = 0; j < ioa_cfg->hrrq_num; j++) { + spin_lock(&ioa_cfg->hrrq[j]._lock); + ioa_cfg->hrrq[j].allow_cmds = 1; + spin_unlock(&ioa_cfg->hrrq[j]._lock); + } + wmb(); ioa_cfg->reset_cmd = NULL; ioa_cfg->doorbell |= IPR_RUNTIME_RESET; @@ -6482,14 +6803,14 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) dev_info(&ioa_cfg->pdev->dev, "IOA initialized.\n"); ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); spin_unlock(ioa_cfg->host->host_lock); scsi_unblock_requests(ioa_cfg->host); spin_lock(ioa_cfg->host->host_lock); - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) scsi_block_requests(ioa_cfg->host); LEAVE; @@ -6560,9 +6881,11 @@ static int ipr_set_supported_devs(struct ipr_cmnd *ipr_cmd) if (!ioa_cfg->sis64) ipr_cmd->job_step = ipr_set_supported_devs; + LEAVE; return IPR_RC_JOB_RETURN; } + LEAVE; return IPR_RC_JOB_CONTINUE; } @@ -6820,7 +7143,7 @@ static int ipr_reset_cmd_failed(struct ipr_cmnd *ipr_cmd) ipr_cmd->ioarcb.cmd_pkt.cdb[0], ioasc); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); return IPR_RC_JOB_RETURN; } @@ -7278,46 +7601,71 @@ static int ipr_ioafp_identify_hrrq(struct ipr_cmnd *ipr_cmd) { struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb; + struct ipr_hrr_queue *hrrq; ENTER; + ipr_cmd->job_step = ipr_ioafp_std_inquiry; dev_info(&ioa_cfg->pdev->dev, "Starting IOA initialization sequence.\n"); - ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; - ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); + if (ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) { + hrrq = &ioa_cfg->hrrq[ioa_cfg->identify_hrrq_index]; - ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; - if (ioa_cfg->sis64) - ioarcb->cmd_pkt.cdb[1] = 0x1; - ioarcb->cmd_pkt.cdb[2] = - ((u64) ioa_cfg->host_rrq_dma >> 24) & 0xff; - ioarcb->cmd_pkt.cdb[3] = - ((u64) ioa_cfg->host_rrq_dma >> 16) & 0xff; - ioarcb->cmd_pkt.cdb[4] = - ((u64) ioa_cfg->host_rrq_dma >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[5] = - ((u64) ioa_cfg->host_rrq_dma) & 0xff; - ioarcb->cmd_pkt.cdb[7] = - ((sizeof(u32) * IPR_NUM_CMD_BLKS) >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[8] = - (sizeof(u32) * IPR_NUM_CMD_BLKS) & 0xff; + ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; + ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); - if (ioa_cfg->sis64) { - ioarcb->cmd_pkt.cdb[10] = - ((u64) ioa_cfg->host_rrq_dma >> 56) & 0xff; - ioarcb->cmd_pkt.cdb[11] = - ((u64) ioa_cfg->host_rrq_dma >> 48) & 0xff; - ioarcb->cmd_pkt.cdb[12] = - ((u64) ioa_cfg->host_rrq_dma >> 40) & 0xff; - ioarcb->cmd_pkt.cdb[13] = - ((u64) ioa_cfg->host_rrq_dma >> 32) & 0xff; - } + ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + if (ioa_cfg->sis64) + ioarcb->cmd_pkt.cdb[1] = 0x1; - ipr_cmd->job_step = ipr_ioafp_std_inquiry; + if (ioa_cfg->nvectors == 1) + ioarcb->cmd_pkt.cdb[1] &= ~IPR_ID_HRRQ_SELE_ENABLE; + else + ioarcb->cmd_pkt.cdb[1] |= IPR_ID_HRRQ_SELE_ENABLE; + + ioarcb->cmd_pkt.cdb[2] = + ((u64) hrrq->host_rrq_dma >> 24) & 0xff; + ioarcb->cmd_pkt.cdb[3] = + ((u64) hrrq->host_rrq_dma >> 16) & 0xff; + ioarcb->cmd_pkt.cdb[4] = + ((u64) hrrq->host_rrq_dma >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[5] = + ((u64) hrrq->host_rrq_dma) & 0xff; + ioarcb->cmd_pkt.cdb[7] = + ((sizeof(u32) * hrrq->size) >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[8] = + (sizeof(u32) * hrrq->size) & 0xff; + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[9] = + ioa_cfg->identify_hrrq_index; - ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, IPR_INTERNAL_TIMEOUT); + if (ioa_cfg->sis64) { + ioarcb->cmd_pkt.cdb[10] = + ((u64) hrrq->host_rrq_dma >> 56) & 0xff; + ioarcb->cmd_pkt.cdb[11] = + ((u64) hrrq->host_rrq_dma >> 48) & 0xff; + ioarcb->cmd_pkt.cdb[12] = + ((u64) hrrq->host_rrq_dma >> 40) & 0xff; + ioarcb->cmd_pkt.cdb[13] = + ((u64) hrrq->host_rrq_dma >> 32) & 0xff; + } + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[14] = + ioa_cfg->identify_hrrq_index; + + ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, + IPR_INTERNAL_TIMEOUT); + + if (++ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) + ipr_cmd->job_step = ipr_ioafp_identify_hrrq; + + LEAVE; + return IPR_RC_JOB_RETURN; + } LEAVE; - return IPR_RC_JOB_RETURN; + return IPR_RC_JOB_CONTINUE; } /** @@ -7365,7 +7713,9 @@ static void ipr_reset_timer_done(struct ipr_cmnd *ipr_cmd) static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, unsigned long timeout) { - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + + ENTER; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; ipr_cmd->timer.data = (unsigned long) ipr_cmd; @@ -7383,13 +7733,26 @@ static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, **/ static void ipr_init_ioa_mem(struct ipr_ioa_cfg *ioa_cfg) { - memset(ioa_cfg->host_rrq, 0, sizeof(u32) * IPR_NUM_CMD_BLKS); + struct ipr_hrr_queue *hrrq; + + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + memset(hrrq->host_rrq, 0, sizeof(u32) * hrrq->size); + + /* Initialize Host RRQ pointers */ + hrrq->hrrq_start = hrrq->host_rrq; + hrrq->hrrq_end = &hrrq->host_rrq[hrrq->size - 1]; + hrrq->hrrq_curr = hrrq->hrrq_start; + hrrq->toggle_bit = 1; + spin_unlock(&hrrq->_lock); + } + wmb(); - /* Initialize Host RRQ pointers */ - ioa_cfg->hrrq_start = ioa_cfg->host_rrq; - ioa_cfg->hrrq_end = &ioa_cfg->host_rrq[IPR_NUM_CMD_BLKS - 1]; - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit = 1; + ioa_cfg->identify_hrrq_index = 0; + if (ioa_cfg->hrrq_num == 1) + atomic_set(&ioa_cfg->hrrq_index, 0); + else + atomic_set(&ioa_cfg->hrrq_index, 1); /* Zero out config table */ memset(ioa_cfg->u.cfg_table, 0, ioa_cfg->cfg_table_size); @@ -7446,7 +7809,8 @@ static int ipr_reset_next_stage(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); return IPR_RC_JOB_RETURN; } @@ -7466,12 +7830,18 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; volatile u32 int_reg; volatile u64 maskval; + int i; ENTER; ipr_cmd->job_step = ipr_ioafp_identify_hrrq; ipr_init_ioa_mem(ioa_cfg); - ioa_cfg->allow_interrupts = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->sis64) { /* Set the adapter to the correct endian mode. */ writel(IPR_ENDIAN_SWAP_KEY, ioa_cfg->regs.endian_swap_reg); @@ -7511,7 +7881,7 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); LEAVE; return IPR_RC_JOB_RETURN; @@ -8030,7 +8400,8 @@ static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd) int rc = IPR_RC_JOB_CONTINUE; ENTER; - if (shutdown_type != IPR_SHUTDOWN_NONE && !ioa_cfg->ioa_is_dead) { + if (shutdown_type != IPR_SHUTDOWN_NONE && + !ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { ipr_cmd->ioarcb.res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); ipr_cmd->ioarcb.cmd_pkt.request_type = IPR_RQTYPE_IOACMD; ipr_cmd->ioarcb.cmd_pkt.cdb[0] = IPR_IOA_SHUTDOWN; @@ -8078,7 +8449,8 @@ static void ipr_reset_ioa_job(struct ipr_cmnd *ipr_cmd) * We are doing nested adapter resets and this is * not the current reset job. */ - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, + &ipr_cmd->hrrq->hrrq_free_q); return; } @@ -8113,9 +8485,15 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { struct ipr_cmnd *ipr_cmd; + int i; ioa_cfg->in_reset_reload = 1; - ioa_cfg->allow_cmds = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); scsi_block_requests(ioa_cfg->host); ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); @@ -8141,7 +8519,9 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { - if (ioa_cfg->ioa_is_dead) + int i; + + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return; if (ioa_cfg->in_reset_reload) { @@ -8156,7 +8536,12 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, "IOA taken offline - error recovery failed\n"); ioa_cfg->reset_retries = 0; - ioa_cfg->ioa_is_dead = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->in_ioa_bringdown) { ioa_cfg->reset_cmd = NULL; @@ -8188,9 +8573,17 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, */ static int ipr_reset_freeze(struct ipr_cmnd *ipr_cmd) { + struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; + int i; + /* Disallow new interrupts, avoid loop */ - ipr_cmd->ioa_cfg->allow_interrupts = 0; - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; return IPR_RC_JOB_RETURN; } @@ -8247,13 +8640,19 @@ static void ipr_pci_perm_failure(struct pci_dev *pdev) { unsigned long flags = 0; struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); if (ioa_cfg->sdt_state == WAIT_FOR_DUMP) ioa_cfg->sdt_state = ABORT_DUMP; ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES; ioa_cfg->in_ioa_bringdown = 1; - ioa_cfg->allow_cmds = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -8310,12 +8709,11 @@ static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg) } else _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_enable_ioa, IPR_SHUTDOWN_NONE); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); - if (ioa_cfg->ioa_is_dead) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { rc = -EIO; } else if (ipr_invalid_adapter(ioa_cfg)) { if (!ipr_testmode) @@ -8376,8 +8774,13 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg) pci_free_consistent(ioa_cfg->pdev, sizeof(struct ipr_misc_cbs), ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); ipr_free_cmd_blks(ioa_cfg); - pci_free_consistent(ioa_cfg->pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + + for (i = 0; i < ioa_cfg->hrrq_num; i++) + pci_free_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + pci_free_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); @@ -8408,8 +8811,23 @@ static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg) struct pci_dev *pdev = ioa_cfg->pdev; ENTER; - free_irq(pdev->irq, ioa_cfg); - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { + int i; + for (i = 0; i < ioa_cfg->nvectors; i++) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + } else + free_irq(pdev->irq, &ioa_cfg->hrrq[0]); + + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + pci_disable_msi(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + pci_disable_msix(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + } + iounmap(ioa_cfg->hdw_dma_regs); pci_release_regions(pdev); ipr_free_mem(ioa_cfg); @@ -8430,7 +8848,7 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; dma_addr_t dma_addr; - int i; + int i, entries_each_hrrq, hrrq_id = 0; ioa_cfg->ipr_cmd_pool = pci_pool_create(IPR_NAME, ioa_cfg->pdev, sizeof(struct ipr_cmnd), 512, 0); @@ -8446,6 +8864,41 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) return -ENOMEM; } + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + if (ioa_cfg->hrrq_num > 1) { + if (i == 0) { + entries_each_hrrq = IPR_NUM_INTERNAL_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = + (entries_each_hrrq - 1); + } else { + entries_each_hrrq = + IPR_NUM_BASE_CMD_BLKS/ + (ioa_cfg->hrrq_num - 1); + ioa_cfg->hrrq[i].min_cmd_id = + IPR_NUM_INTERNAL_CMD_BLKS + + (i - 1) * entries_each_hrrq; + ioa_cfg->hrrq[i].max_cmd_id = + (IPR_NUM_INTERNAL_CMD_BLKS + + i * entries_each_hrrq - 1); + } + } else { + entries_each_hrrq = IPR_NUM_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = (entries_each_hrrq - 1); + } + ioa_cfg->hrrq[i].size = entries_each_hrrq; + } + + BUG_ON(ioa_cfg->hrrq_num == 0); + + i = IPR_NUM_CMD_BLKS - + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id - 1; + if (i > 0) { + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].size += i; + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id += i; + } + for (i = 0; i < IPR_NUM_CMD_BLKS; i++) { ipr_cmd = pci_pool_alloc(ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr); @@ -8484,7 +8937,11 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) ipr_cmd->sense_buffer_dma = dma_addr + offsetof(struct ipr_cmnd, sense_buffer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + ipr_cmd->ioarcb.cmd_pkt.hrrq_id = hrrq_id; + ipr_cmd->hrrq = &ioa_cfg->hrrq[hrrq_id]; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + if (i >= ioa_cfg->hrrq[hrrq_id].max_cmd_id) + hrrq_id++; } return 0; @@ -8516,6 +8973,10 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); ioa_cfg->vset_ids = kzalloc(sizeof(unsigned long) * BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); + + if (!ioa_cfg->target_ids || !ioa_cfg->array_ids + || !ioa_cfg->vset_ids) + goto out_free_res_entries; } for (i = 0; i < ioa_cfg->max_devs_supported; i++) { @@ -8530,15 +8991,34 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) if (!ioa_cfg->vpd_cbs) goto out_free_res_entries; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_free_q); + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_pending_q); + spin_lock_init(&ioa_cfg->hrrq[i]._lock); + if (i == 0) + ioa_cfg->hrrq[i].lock = ioa_cfg->host->host_lock; + else + ioa_cfg->hrrq[i].lock = &ioa_cfg->hrrq[i]._lock; + } + if (ipr_alloc_cmd_blks(ioa_cfg)) goto out_free_vpd_cbs; - ioa_cfg->host_rrq = pci_alloc_consistent(ioa_cfg->pdev, - sizeof(u32) * IPR_NUM_CMD_BLKS, - &ioa_cfg->host_rrq_dma); - - if (!ioa_cfg->host_rrq) - goto out_ipr_free_cmd_blocks; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + ioa_cfg->hrrq[i].host_rrq = pci_alloc_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + &ioa_cfg->hrrq[i].host_rrq_dma); + + if (!ioa_cfg->hrrq[i].host_rrq) { + while (--i > 0) + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + goto out_ipr_free_cmd_blocks; + } + ioa_cfg->hrrq[i].ioa_cfg = ioa_cfg; + } ioa_cfg->u.cfg_table = pci_alloc_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, @@ -8582,8 +9062,12 @@ out_free_hostrcb_dma: ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); out_free_host_rrq: - pci_free_consistent(pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + } out_ipr_free_cmd_blocks: ipr_free_cmd_blks(ioa_cfg); out_free_vpd_cbs: @@ -8591,6 +9075,9 @@ out_free_vpd_cbs: ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); out_free_res_entries: kfree(ioa_cfg->res_entries); + kfree(ioa_cfg->target_ids); + kfree(ioa_cfg->array_ids); + kfree(ioa_cfg->vset_ids); goto out; } @@ -8638,15 +9125,11 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->doorbell = IPR_DOORBELL; sprintf(ioa_cfg->eye_catcher, IPR_EYECATCHER); sprintf(ioa_cfg->trace_start, IPR_TRACE_START_LABEL); - sprintf(ioa_cfg->ipr_free_label, IPR_FREEQ_LABEL); - sprintf(ioa_cfg->ipr_pending_label, IPR_PENDQ_LABEL); sprintf(ioa_cfg->cfg_table_start, IPR_CFG_TBL_START); sprintf(ioa_cfg->resource_table_label, IPR_RES_TABLE_LABEL); sprintf(ioa_cfg->ipr_hcam_label, IPR_HCAM_LABEL); sprintf(ioa_cfg->ipr_cmd_label, IPR_CMD_LABEL); - INIT_LIST_HEAD(&ioa_cfg->free_q); - INIT_LIST_HEAD(&ioa_cfg->pending_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_free_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_pending_q); INIT_LIST_HEAD(&ioa_cfg->free_res_q); @@ -8724,6 +9207,88 @@ ipr_get_chip_info(const struct pci_device_id *dev_id) return NULL; } +static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg) +{ + struct msix_entry entries[IPR_MAX_MSIX_VECTORS]; + int i, err, vectors; + + for (i = 0; i < ARRAY_SIZE(entries); ++i) + entries[i].entry = i; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msix(ioa_cfg->pdev, entries, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msix(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = entries[i].vector; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static int ipr_enable_msi(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, err, vectors; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msi_block(ioa_cfg->pdev, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msi(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = ioa_cfg->pdev->irq + i; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static void name_msi_vectors(struct ipr_ioa_cfg *ioa_cfg) +{ + int vec_idx, n = sizeof(ioa_cfg->vectors_info[0].desc) - 1; + + for (vec_idx = 0; vec_idx < ioa_cfg->nvectors; vec_idx++) { + snprintf(ioa_cfg->vectors_info[vec_idx].desc, n, + "host%d-%d", ioa_cfg->host->host_no, vec_idx); + ioa_cfg->vectors_info[vec_idx]. + desc[strlen(ioa_cfg->vectors_info[vec_idx].desc)] = 0; + } +} + +static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, rc; + + for (i = 1; i < ioa_cfg->nvectors; i++) { + rc = request_irq(ioa_cfg->vectors_info[i].vec, + ipr_isr_mhrrq, + 0, + ioa_cfg->vectors_info[i].desc, + &ioa_cfg->hrrq[i]); + if (rc) { + while (--i >= 0) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + return rc; + } + } + return 0; +} + /** * ipr_test_intr - Handle the interrupt generated in ipr_test_msi(). * @pdev: PCI device struct @@ -8740,6 +9305,7 @@ static irqreturn_t ipr_test_intr(int irq, void *devp) unsigned long lock_flags = 0; irqreturn_t rc = IRQ_HANDLED; + dev_info(&ioa_cfg->pdev->dev, "Received IRQ : %d\n", irq); spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ioa_cfg->msi_received = 1; @@ -8787,9 +9353,9 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) writel(IPR_PCII_IO_DEBUG_ACKNOWLEDGE, ioa_cfg->regs.sense_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg); wait_event_timeout(ioa_cfg->msi_wait_q, ioa_cfg->msi_received, HZ); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->msi_received) { /* MSI test failed */ dev_info(&pdev->dev, "MSI test failed. Falling back to LSI.\n"); @@ -8806,8 +9372,7 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) return rc; } -/** - * ipr_probe_ioa - Allocates memory and does first stage of initialization + /* ipr_probe_ioa - Allocates memory and does first stage of initialization * @pdev: PCI device struct * @dev_id: PCI device id struct * @@ -8823,6 +9388,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, void __iomem *ipr_regs; int rc = PCIBIOS_SUCCESSFUL; volatile u32 mask, uproc, interrupts; + unsigned long lock_flags; ENTER; @@ -8918,17 +9484,56 @@ static int ipr_probe_ioa(struct pci_dev *pdev, goto cleanup_nomem; } - /* Enable MSI style interrupts if they are supported. */ - if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && !pci_enable_msi(pdev)) { + if (ipr_number_of_msix > IPR_MAX_MSIX_VECTORS) { + dev_err(&pdev->dev, "The max number of MSIX is %d\n", + IPR_MAX_MSIX_VECTORS); + ipr_number_of_msix = IPR_MAX_MSIX_VECTORS; + } + + if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msix(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSIX; + else if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msi(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSI; + else { + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + dev_info(&pdev->dev, "Cannot enable MSI.\n"); + } + + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { rc = ipr_test_msi(ioa_cfg, pdev); - if (rc == -EOPNOTSUPP) - pci_disable_msi(pdev); + if (rc == -EOPNOTSUPP) { + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + pci_disable_msi(pdev); + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + pci_disable_msix(pdev); + } + + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + } else if (rc) goto out_msi_disable; - else - dev_info(&pdev->dev, "MSI enabled with IRQ: %d\n", pdev->irq); - } else if (ipr_debug) - dev_info(&pdev->dev, "Cannot enable MSI.\n"); + else { + if (ioa_cfg->intr_flag == IPR_USE_MSI) + dev_info(&pdev->dev, + "Request for %d MSIs succeeded with starting IRQ: %d\n", + ioa_cfg->nvectors, pdev->irq); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + dev_info(&pdev->dev, + "Request for %d MSIXs succeeded.", + ioa_cfg->nvectors); + } + } + + ioa_cfg->hrrq_num = min3(ioa_cfg->nvectors, + (unsigned int)num_online_cpus(), + (unsigned int)IPR_MAX_HRRQ_NUM); /* Save away PCI config space for use following IOA reset */ rc = pci_save_state(pdev); @@ -8975,11 +9580,24 @@ static int ipr_probe_ioa(struct pci_dev *pdev, if (interrupts & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - rc = request_irq(pdev->irq, ipr_isr, - ioa_cfg->msi_received ? 0 : IRQF_SHARED, - IPR_NAME, ioa_cfg); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (ioa_cfg->intr_flag == IPR_USE_MSI + || ioa_cfg->intr_flag == IPR_USE_MSIX) { + name_msi_vectors(ioa_cfg); + rc = request_irq(ioa_cfg->vectors_info[0].vec, ipr_isr, + 0, + ioa_cfg->vectors_info[0].desc, + &ioa_cfg->hrrq[0]); + if (!rc) + rc = ipr_request_other_msi_irqs(ioa_cfg); + } else { + rc = request_irq(pdev->irq, ipr_isr, + IRQF_SHARED, + IPR_NAME, &ioa_cfg->hrrq[0]); + } if (rc) { dev_err(&pdev->dev, "Couldn't register IRQ %d! rc=%d\n", pdev->irq, rc); @@ -9004,7 +9622,10 @@ out: cleanup_nolog: ipr_free_mem(ioa_cfg); out_msi_disable: - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI) + pci_disable_msi(pdev); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + pci_disable_msix(pdev); cleanup_nomem: iounmap(ipr_regs); out_release_regions: @@ -9138,7 +9759,7 @@ static void ipr_remove(struct pci_dev *pdev) static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + int rc, i; rc = ipr_probe_ioa(pdev, dev_id); @@ -9185,6 +9806,17 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) scsi_add_device(ioa_cfg->host, IPR_IOA_BUS, IPR_IOA_TARGET, IPR_IOA_LUN); ioa_cfg->allow_ml_add_del = 1; ioa_cfg->host->max_channel = IPR_VSET_BUS; + ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + schedule_work(&ioa_cfg->work_q); return 0; } @@ -9203,8 +9835,16 @@ static void ipr_shutdown(struct pci_dev *pdev) { struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); unsigned long lock_flags = 0; + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + ioa_cfg->iopoll_weight = 0; + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); @@ -9277,6 +9917,8 @@ static struct pci_device_id ipr_pci_table[] = { { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57B2, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C0, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C3, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C4, 0, 0, 0 }, @@ -9290,6 +9932,14 @@ static struct pci_device_id ipr_pci_table[] = { PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C8, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57CE, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D5, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D6, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D7, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D8, 0, 0, 0 }, { } }; MODULE_DEVICE_TABLE(pci, ipr_pci_table); @@ -9316,9 +9966,7 @@ static struct pci_driver ipr_driver = { **/ static void ipr_halt_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -9340,7 +9988,7 @@ static int ipr_halt(struct notifier_block *nb, ulong event, void *buf) list_for_each_entry(ioa_cfg, &ipr_ioa_head, queue) { spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - if (!ioa_cfg->allow_cmds) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); continue; } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index c8a137f83bb1..1a9a246932ae 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -32,14 +32,15 @@ #include <linux/libata.h> #include <linux/list.h> #include <linux/kref.h> +#include <linux/blk-iopoll.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> /* * Literals */ -#define IPR_DRIVER_VERSION "2.5.4" -#define IPR_DRIVER_DATE "(July 11, 2012)" +#define IPR_DRIVER_VERSION "2.6.0" +#define IPR_DRIVER_DATE "(November 16, 2012)" /* * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding @@ -82,6 +83,7 @@ #define IPR_SUBS_DEV_ID_57B4 0x033B #define IPR_SUBS_DEV_ID_57B2 0x035F +#define IPR_SUBS_DEV_ID_57C0 0x0352 #define IPR_SUBS_DEV_ID_57C3 0x0353 #define IPR_SUBS_DEV_ID_57C4 0x0354 #define IPR_SUBS_DEV_ID_57C6 0x0357 @@ -94,6 +96,10 @@ #define IPR_SUBS_DEV_ID_574D 0x0356 #define IPR_SUBS_DEV_ID_57C8 0x035D +#define IPR_SUBS_DEV_ID_57D5 0x03FB +#define IPR_SUBS_DEV_ID_57D6 0x03FC +#define IPR_SUBS_DEV_ID_57D7 0x03FF +#define IPR_SUBS_DEV_ID_57D8 0x03FE #define IPR_NAME "ipr" /* @@ -298,6 +304,9 @@ IPR_PCII_NO_HOST_RRQ | IPR_PCII_IOARRIN_LOST | IPR_PCII_MMIO_ERROR) * Misc literals */ #define IPR_NUM_IOADL_ENTRIES IPR_MAX_SGLIST +#define IPR_MAX_MSIX_VECTORS 0x5 +#define IPR_MAX_HRRQ_NUM 0x10 +#define IPR_INIT_HRRQ 0x0 /* * Adapter interface types @@ -404,7 +413,7 @@ struct ipr_config_table_entry64 { __be64 dev_id; __be64 lun; __be64 lun_wwn[2]; -#define IPR_MAX_RES_PATH_LENGTH 24 +#define IPR_MAX_RES_PATH_LENGTH 48 __be64 res_path; struct ipr_std_inq_data std_inq_data; u8 reserved2[4]; @@ -459,9 +468,39 @@ struct ipr_supported_device { u8 reserved2[16]; }__attribute__((packed, aligned (4))); +struct ipr_hrr_queue { + struct ipr_ioa_cfg *ioa_cfg; + __be32 *host_rrq; + dma_addr_t host_rrq_dma; +#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc +#define IPR_HRRQ_RESP_BIT_SET 0x00000002 +#define IPR_HRRQ_TOGGLE_BIT 0x00000001 +#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 +#define IPR_ID_HRRQ_SELE_ENABLE 0x02 + volatile __be32 *hrrq_start; + volatile __be32 *hrrq_end; + volatile __be32 *hrrq_curr; + + struct list_head hrrq_free_q; + struct list_head hrrq_pending_q; + spinlock_t _lock; + spinlock_t *lock; + + volatile u32 toggle_bit; + u32 size; + u32 min_cmd_id; + u32 max_cmd_id; + u8 allow_interrupts:1; + u8 ioa_is_dead:1; + u8 allow_cmds:1; + + struct blk_iopoll iopoll; +}; + /* Command packet structure */ struct ipr_cmd_pkt { - __be16 reserved; /* Reserved by IOA */ + u8 reserved; /* Reserved by IOA */ + u8 hrrq_id; u8 request_type; #define IPR_RQTYPE_SCSICDB 0x00 #define IPR_RQTYPE_IOACMD 0x01 @@ -1022,6 +1061,10 @@ struct ipr_hostrcb64_fabric_desc { struct ipr_hostrcb64_config_element elem[1]; }__attribute__((packed, aligned (8))); +#define for_each_hrrq(hrrq, ioa_cfg) \ + for (hrrq = (ioa_cfg)->hrrq; \ + hrrq < ((ioa_cfg)->hrrq + (ioa_cfg)->hrrq_num); hrrq++) + #define for_each_fabric_cfg(fabric, cfg) \ for (cfg = (fabric)->elem; \ cfg < ((fabric)->elem + be16_to_cpu((fabric)->num_entries)); \ @@ -1308,6 +1351,7 @@ struct ipr_chip_cfg_t { u16 max_cmds; u8 cache_line_size; u8 clear_isr; + u32 iopoll_weight; struct ipr_interrupt_offsets regs; }; @@ -1317,6 +1361,7 @@ struct ipr_chip_t { u16 intr_type; #define IPR_USE_LSI 0x00 #define IPR_USE_MSI 0x01 +#define IPR_USE_MSIX 0x02 u16 sis_type; #define IPR_SIS32 0x00 #define IPR_SIS64 0x01 @@ -1375,13 +1420,10 @@ struct ipr_ioa_cfg { struct list_head queue; - u8 allow_interrupts:1; u8 in_reset_reload:1; u8 in_ioa_bringdown:1; u8 ioa_unit_checked:1; - u8 ioa_is_dead:1; u8 dump_taken:1; - u8 allow_cmds:1; u8 allow_ml_add_del:1; u8 needs_hard_reset:1; u8 dual_raid:1; @@ -1413,21 +1455,7 @@ struct ipr_ioa_cfg { char trace_start[8]; #define IPR_TRACE_START_LABEL "trace" struct ipr_trace_entry *trace; - u32 trace_index:IPR_NUM_TRACE_INDEX_BITS; - - /* - * Queue for free command blocks - */ - char ipr_free_label[8]; -#define IPR_FREEQ_LABEL "free-q" - struct list_head free_q; - - /* - * Queue for command blocks outstanding to the adapter - */ - char ipr_pending_label[8]; -#define IPR_PENDQ_LABEL "pend-q" - struct list_head pending_q; + atomic_t trace_index; char cfg_table_start[8]; #define IPR_CFG_TBL_START "cfg" @@ -1452,16 +1480,10 @@ struct ipr_ioa_cfg { struct list_head hostrcb_free_q; struct list_head hostrcb_pending_q; - __be32 *host_rrq; - dma_addr_t host_rrq_dma; -#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc -#define IPR_HRRQ_RESP_BIT_SET 0x00000002 -#define IPR_HRRQ_TOGGLE_BIT 0x00000001 -#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 - volatile __be32 *hrrq_start; - volatile __be32 *hrrq_end; - volatile __be32 *hrrq_curr; - volatile u32 toggle_bit; + struct ipr_hrr_queue hrrq[IPR_MAX_HRRQ_NUM]; + u32 hrrq_num; + atomic_t hrrq_index; + u16 identify_hrrq_index; struct ipr_bus_attributes bus_attr[IPR_MAX_NUM_BUSES]; @@ -1507,6 +1529,17 @@ struct ipr_ioa_cfg { u32 max_cmds; struct ipr_cmnd **ipr_cmnd_list; dma_addr_t *ipr_cmnd_list_dma; + + u16 intr_flag; + unsigned int nvectors; + + struct { + unsigned short vec; + char desc[22]; + } vectors_info[IPR_MAX_MSIX_VECTORS]; + + u32 iopoll_weight; + }; /* struct ipr_ioa_cfg */ struct ipr_cmnd { @@ -1544,6 +1577,7 @@ struct ipr_cmnd { struct scsi_device *sdev; } u; + struct ipr_hrr_queue *hrrq; struct ipr_ioa_cfg *ioa_cfg; }; @@ -1717,7 +1751,8 @@ struct ipr_ucode_image_header { if (ipr_is_device(hostrcb)) { \ if ((hostrcb)->ioa_cfg->sis64) { \ printk(KERN_ERR IPR_NAME ": %s: " fmt, \ - ipr_format_res_path(hostrcb->hcam.u.error64.fd_res_path, \ + ipr_format_res_path(hostrcb->ioa_cfg, \ + hostrcb->hcam.u.error64.fd_res_path, \ hostrcb->rp_buffer, \ sizeof(hostrcb->rp_buffer)), \ __VA_ARGS__); \ diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index df4c13a5534c..7706c99ec8bb 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -466,11 +466,13 @@ enum intr_type_t { MSIX, }; +#define LPFC_CT_CTX_MAX 64 struct unsol_rcv_ct_ctx { uint32_t ctxt_id; uint32_t SID; - uint32_t flags; -#define UNSOL_VALID 0x00000001 + uint32_t valid; +#define UNSOL_INVALID 0 +#define UNSOL_VALID 1 uint16_t oxid; uint16_t rxid; }; @@ -750,6 +752,15 @@ struct lpfc_hba { void __iomem *ctrl_regs_memmap_p;/* Kernel memory mapped address for PCI BAR2 */ + void __iomem *pci_bar0_memmap_p; /* Kernel memory mapped address for + PCI BAR0 with dual-ULP support */ + void __iomem *pci_bar2_memmap_p; /* Kernel memory mapped address for + PCI BAR2 with dual-ULP support */ + void __iomem *pci_bar4_memmap_p; /* Kernel memory mapped address for + PCI BAR4 with dual-ULP support */ +#define PCI_64BIT_BAR0 0 +#define PCI_64BIT_BAR2 2 +#define PCI_64BIT_BAR4 4 void __iomem *MBslimaddr; /* virtual address for mbox cmds */ void __iomem *HAregaddr; /* virtual address for host attn reg */ void __iomem *CAregaddr; /* virtual address for chip attn reg */ @@ -938,7 +949,7 @@ struct lpfc_hba { spinlock_t ct_ev_lock; /* synchronize access to ct_ev_waiters */ struct list_head ct_ev_waiters; - struct unsol_rcv_ct_ctx ct_ctx[64]; + struct unsol_rcv_ct_ctx ct_ctx[LPFC_CT_CTX_MAX]; uint32_t ctx_idx; uint8_t menlo_flag; /* menlo generic flags */ diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index f7368eb80415..32d5683e6181 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -955,9 +955,9 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, spin_lock_irqsave(&phba->ct_ev_lock, flags); if (phba->sli_rev == LPFC_SLI_REV4) { evt_dat->immed_dat = phba->ctx_idx; - phba->ctx_idx = (phba->ctx_idx + 1) % 64; + phba->ctx_idx = (phba->ctx_idx + 1) % LPFC_CT_CTX_MAX; /* Provide warning for over-run of the ct_ctx array */ - if (phba->ct_ctx[evt_dat->immed_dat].flags & + if (phba->ct_ctx[evt_dat->immed_dat].valid == UNSOL_VALID) lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, "2717 CT context array entry " @@ -973,7 +973,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, piocbq->iocb.unsli3.rcvsli3.ox_id; phba->ct_ctx[evt_dat->immed_dat].SID = piocbq->iocb.un.rcvels.remoteID; - phba->ct_ctx[evt_dat->immed_dat].flags = UNSOL_VALID; + phba->ct_ctx[evt_dat->immed_dat].valid = UNSOL_VALID; } else evt_dat->immed_dat = piocbq->iocb.ulpContext; @@ -1013,6 +1013,47 @@ error_ct_unsol_exit: } /** + * lpfc_bsg_ct_unsol_abort - handler ct abort to management plane + * @phba: Pointer to HBA context object. + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function handles abort to the CT command toward management plane + * for SLI4 port. + * + * If the pending context of a CT command to management plane present, clears + * such context and returns 1 for handled; otherwise, it returns 0 indicating + * no context exists. + **/ +int +lpfc_bsg_ct_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) +{ + struct fc_frame_header fc_hdr; + struct fc_frame_header *fc_hdr_ptr = &fc_hdr; + int ctx_idx, handled = 0; + uint16_t oxid, rxid; + uint32_t sid; + + memcpy(fc_hdr_ptr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); + sid = sli4_sid_from_fc_hdr(fc_hdr_ptr); + oxid = be16_to_cpu(fc_hdr_ptr->fh_ox_id); + rxid = be16_to_cpu(fc_hdr_ptr->fh_rx_id); + + for (ctx_idx = 0; ctx_idx < LPFC_CT_CTX_MAX; ctx_idx++) { + if (phba->ct_ctx[ctx_idx].valid != UNSOL_VALID) + continue; + if (phba->ct_ctx[ctx_idx].rxid != rxid) + continue; + if (phba->ct_ctx[ctx_idx].oxid != oxid) + continue; + if (phba->ct_ctx[ctx_idx].SID != sid) + continue; + phba->ct_ctx[ctx_idx].valid = UNSOL_INVALID; + handled = 1; + } + return handled; +} + +/** * lpfc_bsg_hba_set_event - process a SET_EVENT bsg vendor command * @job: SET_EVENT fc_bsg_job **/ @@ -1318,7 +1359,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, icmd->ulpClass = CLASS3; if (phba->sli_rev == LPFC_SLI_REV4) { /* Do not issue unsol response if oxid not marked as valid */ - if (!(phba->ct_ctx[tag].flags & UNSOL_VALID)) { + if (phba->ct_ctx[tag].valid != UNSOL_VALID) { rc = IOCB_ERROR; goto issue_ct_rsp_exit; } @@ -1352,7 +1393,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; /* The exchange is done, mark the entry as invalid */ - phba->ct_ctx[tag].flags &= ~UNSOL_VALID; + phba->ct_ctx[tag].valid = UNSOL_INVALID; } else icmd->ulpContext = (ushort) tag; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 69d66e3662cb..76ca65dae781 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -164,8 +164,7 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *); void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); -void lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, - struct lpfc_iocbq *); +int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t); int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int); void lpfc_fdmi_tmo(unsigned long); @@ -427,6 +426,7 @@ int lpfc_bsg_request(struct fc_bsg_job *); int lpfc_bsg_timeout(struct fc_bsg_job *); int lpfc_bsg_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); +int lpfc_bsg_ct_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); void __lpfc_sli_ringtx_put(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); struct lpfc_iocbq *lpfc_sli_ringtx_get(struct lpfc_hba *, diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 65f9fb6862e6..7bff3a19af56 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -164,37 +164,24 @@ lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, } /** - * lpfc_sli4_ct_abort_unsol_event - Default handle for sli4 unsol abort + * lpfc_ct_handle_unsol_abort - ct upper level protocol abort handler * @phba: Pointer to HBA context object. - * @pring: Pointer to the driver internal I/O ring. - * @piocbq: Pointer to the IOCBQ. + * @dmabuf: pointer to a dmabuf that describes the FC sequence * - * This function serves as the default handler for the sli4 unsolicited - * abort event. It shall be invoked when there is no application interface - * registered unsolicited abort handler. This handler does nothing but - * just simply releases the dma buffer used by the unsol abort event. + * This function serves as the upper level protocol abort handler for CT + * protocol. + * + * Return 1 if abort has been handled, 0 otherwise. **/ -void -lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, - struct lpfc_iocbq *piocbq) +int +lpfc_ct_handle_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) { - IOCB_t *icmd = &piocbq->iocb; - struct lpfc_dmabuf *bdeBuf; - uint32_t size; + int handled; - /* Forward abort event to any process registered to receive ct event */ - if (lpfc_bsg_ct_unsol_event(phba, pring, piocbq) == 0) - return; + /* CT upper level goes through BSG */ + handled = lpfc_bsg_ct_unsol_abort(phba, dmabuf); - /* If there is no BDE associated with IOCB, there is nothing to do */ - if (icmd->ulpBdeCount == 0) - return; - bdeBuf = piocbq->context2; - piocbq->context2 = NULL; - size = icmd->un.cont64[0].tus.f.bdeSize; - lpfc_ct_unsol_buffer(phba, piocbq, bdeBuf, size); - lpfc_in_buf_free(phba, bdeBuf); + return handled; } static void diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index b9440deaad45..08d156a9094f 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3122,6 +3122,13 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, case IOERR_SEQUENCE_TIMEOUT: case IOERR_INVALID_RPI: + if (cmd == ELS_CMD_PLOGI && + did == NameServer_DID) { + /* Continue forever if plogi to */ + /* the nameserver fails */ + maxretry = 0; + delay = 100; + } retry = 1; break; } @@ -6517,7 +6524,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_nodelist *ndlp; struct ls_rjt stat; uint32_t *payload; - uint32_t cmd, did, newnode, rjt_err = 0; + uint32_t cmd, did, newnode; + uint8_t rjt_exp, rjt_err = 0; IOCB_t *icmd = &elsiocb->iocb; if (!vport || !(elsiocb->context2)) @@ -6606,12 +6614,14 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* If Nport discovery is delayed, reject PLOGIs */ if (vport->fc_flag & FC_DISC_DELAYED) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } if (vport->port_state < LPFC_DISC_AUTH) { if (!(phba->pport->fc_flag & FC_PT2PT) || (phba->pport->fc_flag & FC_PT2PT_PLOGI)) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } /* We get here, and drop thru, if we are PT2PT with @@ -6648,6 +6658,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO); @@ -6661,6 +6672,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO); @@ -6680,6 +6692,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvADISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6693,6 +6706,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPDISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6730,6 +6744,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPRLI++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI); @@ -6813,6 +6828,11 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (newnode) lpfc_nlp_put(ndlp); break; + case ELS_CMD_REC: + /* receive this due to exchange closed */ + rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_INVALID_OX_RX; + break; default: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, "RCV ELS cmd: cmd:x%x did:x%x/ste:x%x", @@ -6820,6 +6840,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* Unsupported ELS command, reject */ rjt_err = LSRJT_CMD_UNSUPPORTED; + rjt_exp = LSEXP_NOTHING_MORE; /* Unknown ELS command <elsCmd> received from NPORT <did> */ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, @@ -6834,7 +6855,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (rjt_err) { memset(&stat, 0, sizeof(stat)); stat.un.b.lsRjtRsnCode = rjt_err; - stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; + stat.un.b.lsRjtRsnCodeExp = rjt_exp; lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp, NULL); } diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 7398ca862e97..e8c476031703 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -538,6 +538,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10000000 #define ELS_CMD_TEST 0x11000000 #define ELS_CMD_RRQ 0x12000000 +#define ELS_CMD_REC 0x13000000 #define ELS_CMD_PRLI 0x20100014 #define ELS_CMD_PRLO 0x21100014 #define ELS_CMD_PRLO_ACC 0x02100014 @@ -574,6 +575,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10 #define ELS_CMD_TEST 0x11 #define ELS_CMD_RRQ 0x12 +#define ELS_CMD_REC 0x13 #define ELS_CMD_PRLI 0x14001020 #define ELS_CMD_PRLO 0x14001021 #define ELS_CMD_PRLO_ACC 0x14001002 diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index a47cfbdd05f2..6e93b886cd4d 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -106,6 +106,7 @@ struct lpfc_sli_intf { #define LPFC_SLI4_MB_WORD_COUNT 64 #define LPFC_MAX_MQ_PAGE 8 +#define LPFC_MAX_WQ_PAGE_V0 4 #define LPFC_MAX_WQ_PAGE 8 #define LPFC_MAX_CQ_PAGE 4 #define LPFC_MAX_EQ_PAGE 8 @@ -703,24 +704,41 @@ struct lpfc_register { * BAR0. The offsets are the same so the driver must account for * any base address difference. */ -#define LPFC_RQ_DOORBELL 0x00A0 -#define lpfc_rq_doorbell_num_posted_SHIFT 16 -#define lpfc_rq_doorbell_num_posted_MASK 0x3FFF -#define lpfc_rq_doorbell_num_posted_WORD word0 -#define lpfc_rq_doorbell_id_SHIFT 0 -#define lpfc_rq_doorbell_id_MASK 0xFFFF -#define lpfc_rq_doorbell_id_WORD word0 - -#define LPFC_WQ_DOORBELL 0x0040 -#define lpfc_wq_doorbell_num_posted_SHIFT 24 -#define lpfc_wq_doorbell_num_posted_MASK 0x00FF -#define lpfc_wq_doorbell_num_posted_WORD word0 -#define lpfc_wq_doorbell_index_SHIFT 16 -#define lpfc_wq_doorbell_index_MASK 0x00FF -#define lpfc_wq_doorbell_index_WORD word0 -#define lpfc_wq_doorbell_id_SHIFT 0 -#define lpfc_wq_doorbell_id_MASK 0xFFFF -#define lpfc_wq_doorbell_id_WORD word0 +#define LPFC_ULP0_RQ_DOORBELL 0x00A0 +#define LPFC_ULP1_RQ_DOORBELL 0x00C0 +#define lpfc_rq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_rq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_rq_db_list_fm_num_posted_WORD word0 +#define lpfc_rq_db_list_fm_index_SHIFT 16 +#define lpfc_rq_db_list_fm_index_MASK 0x00FF +#define lpfc_rq_db_list_fm_index_WORD word0 +#define lpfc_rq_db_list_fm_id_SHIFT 0 +#define lpfc_rq_db_list_fm_id_MASK 0xFFFF +#define lpfc_rq_db_list_fm_id_WORD word0 +#define lpfc_rq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_rq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_rq_db_ring_fm_num_posted_WORD word0 +#define lpfc_rq_db_ring_fm_id_SHIFT 0 +#define lpfc_rq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_rq_db_ring_fm_id_WORD word0 + +#define LPFC_ULP0_WQ_DOORBELL 0x0040 +#define LPFC_ULP1_WQ_DOORBELL 0x0060 +#define lpfc_wq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_wq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_wq_db_list_fm_num_posted_WORD word0 +#define lpfc_wq_db_list_fm_index_SHIFT 16 +#define lpfc_wq_db_list_fm_index_MASK 0x00FF +#define lpfc_wq_db_list_fm_index_WORD word0 +#define lpfc_wq_db_list_fm_id_SHIFT 0 +#define lpfc_wq_db_list_fm_id_MASK 0xFFFF +#define lpfc_wq_db_list_fm_id_WORD word0 +#define lpfc_wq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_wq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_wq_db_ring_fm_num_posted_WORD word0 +#define lpfc_wq_db_ring_fm_id_SHIFT 0 +#define lpfc_wq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_wq_db_ring_fm_id_WORD word0 #define LPFC_EQCQ_DOORBELL 0x0120 #define lpfc_eqcq_doorbell_se_SHIFT 31 @@ -1131,12 +1149,22 @@ struct lpfc_mbx_wq_create { struct { /* Version 0 Request */ uint32_t word0; #define lpfc_mbx_wq_create_num_pages_SHIFT 0 -#define lpfc_mbx_wq_create_num_pages_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_num_pages_MASK 0x000000FF #define lpfc_mbx_wq_create_num_pages_WORD word0 +#define lpfc_mbx_wq_create_dua_SHIFT 8 +#define lpfc_mbx_wq_create_dua_MASK 0x00000001 +#define lpfc_mbx_wq_create_dua_WORD word0 #define lpfc_mbx_wq_create_cq_id_SHIFT 16 #define lpfc_mbx_wq_create_cq_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_cq_id_WORD word0 - struct dma_address page[LPFC_MAX_WQ_PAGE]; + struct dma_address page[LPFC_MAX_WQ_PAGE_V0]; + uint32_t word9; +#define lpfc_mbx_wq_create_bua_SHIFT 0 +#define lpfc_mbx_wq_create_bua_MASK 0x00000001 +#define lpfc_mbx_wq_create_bua_WORD word9 +#define lpfc_mbx_wq_create_ulp_num_SHIFT 8 +#define lpfc_mbx_wq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_wq_create_ulp_num_WORD word9 } request; struct { /* Version 1 Request */ uint32_t word0; /* Word 0 is the same as in v0 */ @@ -1160,6 +1188,17 @@ struct lpfc_mbx_wq_create { #define lpfc_mbx_wq_create_q_id_SHIFT 0 #define lpfc_mbx_wq_create_q_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_wq_create_bar_set_SHIFT 0 +#define lpfc_mbx_wq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_bar_set_WORD word2 +#define WQ_PCI_BAR_0_AND_1 0x00 +#define WQ_PCI_BAR_2_AND_3 0x01 +#define WQ_PCI_BAR_4_AND_5 0x02 +#define lpfc_mbx_wq_create_db_format_SHIFT 16 +#define lpfc_mbx_wq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_db_format_WORD word2 } response; } u; }; @@ -1223,14 +1262,31 @@ struct lpfc_mbx_rq_create { #define lpfc_mbx_rq_create_num_pages_SHIFT 0 #define lpfc_mbx_rq_create_num_pages_MASK 0x0000FFFF #define lpfc_mbx_rq_create_num_pages_WORD word0 +#define lpfc_mbx_rq_create_dua_SHIFT 16 +#define lpfc_mbx_rq_create_dua_MASK 0x00000001 +#define lpfc_mbx_rq_create_dua_WORD word0 +#define lpfc_mbx_rq_create_bqu_SHIFT 17 +#define lpfc_mbx_rq_create_bqu_MASK 0x00000001 +#define lpfc_mbx_rq_create_bqu_WORD word0 +#define lpfc_mbx_rq_create_ulp_num_SHIFT 24 +#define lpfc_mbx_rq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_rq_create_ulp_num_WORD word0 struct rq_context context; struct dma_address page[LPFC_MAX_WQ_PAGE]; } request; struct { uint32_t word0; -#define lpfc_mbx_rq_create_q_id_SHIFT 0 -#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF -#define lpfc_mbx_rq_create_q_id_WORD word0 +#define lpfc_mbx_rq_create_q_id_SHIFT 0 +#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_rq_create_bar_set_SHIFT 0 +#define lpfc_mbx_rq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_bar_set_WORD word2 +#define lpfc_mbx_rq_create_db_format_SHIFT 16 +#define lpfc_mbx_rq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_db_format_WORD word2 } response; } u; }; @@ -1388,6 +1444,33 @@ struct lpfc_mbx_get_rsrc_extent_info { } u; }; +struct lpfc_mbx_query_fw_config { + struct mbox_header header; + struct { + uint32_t config_number; +#define LPFC_FC_FCOE 0x00000007 + uint32_t asic_revision; + uint32_t physical_port; + uint32_t function_mode; +#define LPFC_FCOE_INI_MODE 0x00000040 +#define LPFC_FCOE_TGT_MODE 0x00000080 +#define LPFC_DUA_MODE 0x00000800 + uint32_t ulp0_mode; +#define LPFC_ULP_FCOE_INIT_MODE 0x00000040 +#define LPFC_ULP_FCOE_TGT_MODE 0x00000080 + uint32_t ulp0_nap_words[12]; + uint32_t ulp1_mode; + uint32_t ulp1_nap_words[12]; + uint32_t function_capabilities; + uint32_t cqid_base; + uint32_t cqid_tot; + uint32_t eqid_base; + uint32_t eqid_tot; + uint32_t ulp0_nap2_words[2]; + uint32_t ulp1_nap2_words[2]; + } rsp; +}; + struct lpfc_id_range { uint32_t word5; #define lpfc_mbx_rsrc_id_word4_0_SHIFT 0 @@ -1803,51 +1886,6 @@ struct lpfc_mbx_redisc_fcf_tbl { #define lpfc_mbx_redisc_fcf_index_WORD word12 }; -struct lpfc_mbx_query_fw_cfg { - struct mbox_header header; - uint32_t config_number; - uint32_t asic_rev; - uint32_t phys_port; - uint32_t function_mode; -/* firmware Function Mode */ -#define lpfc_function_mode_toe_SHIFT 0 -#define lpfc_function_mode_toe_MASK 0x00000001 -#define lpfc_function_mode_toe_WORD function_mode -#define lpfc_function_mode_nic_SHIFT 1 -#define lpfc_function_mode_nic_MASK 0x00000001 -#define lpfc_function_mode_nic_WORD function_mode -#define lpfc_function_mode_rdma_SHIFT 2 -#define lpfc_function_mode_rdma_MASK 0x00000001 -#define lpfc_function_mode_rdma_WORD function_mode -#define lpfc_function_mode_vm_SHIFT 3 -#define lpfc_function_mode_vm_MASK 0x00000001 -#define lpfc_function_mode_vm_WORD function_mode -#define lpfc_function_mode_iscsi_i_SHIFT 4 -#define lpfc_function_mode_iscsi_i_MASK 0x00000001 -#define lpfc_function_mode_iscsi_i_WORD function_mode -#define lpfc_function_mode_iscsi_t_SHIFT 5 -#define lpfc_function_mode_iscsi_t_MASK 0x00000001 -#define lpfc_function_mode_iscsi_t_WORD function_mode -#define lpfc_function_mode_fcoe_i_SHIFT 6 -#define lpfc_function_mode_fcoe_i_MASK 0x00000001 -#define lpfc_function_mode_fcoe_i_WORD function_mode -#define lpfc_function_mode_fcoe_t_SHIFT 7 -#define lpfc_function_mode_fcoe_t_MASK 0x00000001 -#define lpfc_function_mode_fcoe_t_WORD function_mode -#define lpfc_function_mode_dal_SHIFT 8 -#define lpfc_function_mode_dal_MASK 0x00000001 -#define lpfc_function_mode_dal_WORD function_mode -#define lpfc_function_mode_lro_SHIFT 9 -#define lpfc_function_mode_lro_MASK 0x00000001 -#define lpfc_function_mode_lro_WORD function_mode -#define lpfc_function_mode_flex10_SHIFT 10 -#define lpfc_function_mode_flex10_MASK 0x00000001 -#define lpfc_function_mode_flex10_WORD function_mode -#define lpfc_function_mode_ncsi_SHIFT 11 -#define lpfc_function_mode_ncsi_MASK 0x00000001 -#define lpfc_function_mode_ncsi_WORD function_mode -}; - /* Status field for embedded SLI_CONFIG mailbox command */ #define STATUS_SUCCESS 0x0 #define STATUS_FAILED 0x1 @@ -2965,7 +3003,7 @@ struct lpfc_mqe { struct lpfc_mbx_read_config rd_config; struct lpfc_mbx_request_features req_ftrs; struct lpfc_mbx_post_hdr_tmpl hdr_tmpl; - struct lpfc_mbx_query_fw_cfg query_fw_cfg; + struct lpfc_mbx_query_fw_config query_fw_cfg; struct lpfc_mbx_supp_pages supp_pages; struct lpfc_mbx_pc_sli4_params sli4_params; struct lpfc_mbx_get_sli4_parameters get_sli4_parameters; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 7de4ef14698f..314b4f61b9e3 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -6229,9 +6229,11 @@ lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba, uint32_t if_type) phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_SEM_OFFSET; phba->sli4_hba.RQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_RQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_RQ_DOORBELL; phba->sli4_hba.WQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_WQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_WQ_DOORBELL; phba->sli4_hba.EQCQDBregaddr = phba->sli4_hba.conf_regs_memmap_p + LPFC_EQCQ_DOORBELL; phba->sli4_hba.MQDBregaddr = @@ -6285,9 +6287,11 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf) return -ENODEV; phba->sli4_hba.RQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_RQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_RQ_DOORBELL); phba->sli4_hba.WQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_WQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_WQ_DOORBELL); phba->sli4_hba.EQCQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + vf * LPFC_VFR_PAGE_SIZE + LPFC_EQCQ_DOORBELL); phba->sli4_hba.MQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + @@ -6983,6 +6987,19 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) phba->sli4_hba.fcp_wq = NULL; } + if (phba->pci_bar0_memmap_p) { + iounmap(phba->pci_bar0_memmap_p); + phba->pci_bar0_memmap_p = NULL; + } + if (phba->pci_bar2_memmap_p) { + iounmap(phba->pci_bar2_memmap_p); + phba->pci_bar2_memmap_p = NULL; + } + if (phba->pci_bar4_memmap_p) { + iounmap(phba->pci_bar4_memmap_p); + phba->pci_bar4_memmap_p = NULL; + } + /* Release FCP CQ mapping array */ if (phba->sli4_hba.fcp_cq_map != NULL) { kfree(phba->sli4_hba.fcp_cq_map); @@ -7046,6 +7063,53 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) int rc = -ENOMEM; int fcp_eqidx, fcp_cqidx, fcp_wqidx; int fcp_cq_index = 0; + uint32_t shdr_status, shdr_add_status; + union lpfc_sli4_cfg_shdr *shdr; + LPFC_MBOXQ_t *mboxq; + uint32_t length; + + /* Check for dual-ULP support */ + mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mboxq) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3249 Unable to allocate memory for " + "QUERY_FW_CFG mailbox command\n"); + return -ENOMEM; + } + length = (sizeof(struct lpfc_mbx_query_fw_config) - + sizeof(struct lpfc_sli4_cfg_mhdr)); + lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, + LPFC_MBOX_OPCODE_QUERY_FW_CFG, + length, LPFC_SLI4_MBX_EMBED); + + rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); + + shdr = (union lpfc_sli4_cfg_shdr *) + &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; + shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); + shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response); + if (shdr_status || shdr_add_status || rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3250 QUERY_FW_CFG mailbox failed with status " + "x%x add_status x%x, mbx status x%x\n", + shdr_status, shdr_add_status, rc); + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); + rc = -ENXIO; + goto out_error; + } + + phba->sli4_hba.fw_func_mode = + mboxq->u.mqe.un.query_fw_cfg.rsp.function_mode; + phba->sli4_hba.ulp0_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp0_mode; + phba->sli4_hba.ulp1_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp1_mode; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3251 QUERY_FW_CFG: func_mode:x%x, ulp0_mode:x%x, " + "ulp1_mode:x%x\n", phba->sli4_hba.fw_func_mode, + phba->sli4_hba.ulp0_mode, phba->sli4_hba.ulp1_mode); + + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); /* * Set up HBA Event Queues (EQs) @@ -7660,78 +7724,6 @@ out: } /** - * lpfc_sli4_send_nop_mbox_cmds - Send sli-4 nop mailbox commands - * @phba: pointer to lpfc hba data structure. - * @cnt: number of nop mailbox commands to send. - * - * This routine is invoked to send a number @cnt of NOP mailbox command and - * wait for each command to complete. - * - * Return: the number of NOP mailbox command completed. - **/ -static int -lpfc_sli4_send_nop_mbox_cmds(struct lpfc_hba *phba, uint32_t cnt) -{ - LPFC_MBOXQ_t *mboxq; - int length, cmdsent; - uint32_t mbox_tmo; - uint32_t rc = 0; - uint32_t shdr_status, shdr_add_status; - union lpfc_sli4_cfg_shdr *shdr; - - if (cnt == 0) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2518 Requested to send 0 NOP mailbox cmd\n"); - return cnt; - } - - mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mboxq) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "2519 Unable to allocate memory for issuing " - "NOP mailbox command\n"); - return 0; - } - - /* Set up NOP SLI4_CONFIG mailbox-ioctl command */ - length = (sizeof(struct lpfc_mbx_nop) - - sizeof(struct lpfc_sli4_cfg_mhdr)); - - for (cmdsent = 0; cmdsent < cnt; cmdsent++) { - lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, - LPFC_MBOX_OPCODE_NOP, length, - LPFC_SLI4_MBX_EMBED); - if (!phba->sli4_hba.intr_enable) - rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); - else { - mbox_tmo = lpfc_mbox_tmo_val(phba, mboxq); - rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo); - } - if (rc == MBX_TIMEOUT) - break; - /* Check return status */ - shdr = (union lpfc_sli4_cfg_shdr *) - &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; - shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); - shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, - &shdr->response); - if (shdr_status || shdr_add_status || rc) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2520 NOP mailbox command failed " - "status x%x add_status x%x mbx " - "status x%x\n", shdr_status, - shdr_add_status, rc); - break; - } - } - - if (rc != MBX_TIMEOUT) - mempool_free(mboxq, phba->mbox_mem_pool); - - return cmdsent; -} - -/** * lpfc_sli4_pci_mem_setup - Setup SLI4 HBA PCI memory space. * @phba: pointer to lpfc hba data structure. * @@ -8499,37 +8491,6 @@ lpfc_unset_hba(struct lpfc_hba *phba) } /** - * lpfc_sli4_unset_hba - Unset SLI4 hba device initialization. - * @phba: pointer to lpfc hba data structure. - * - * This routine is invoked to unset the HBA device initialization steps to - * a device with SLI-4 interface spec. - **/ -static void -lpfc_sli4_unset_hba(struct lpfc_hba *phba) -{ - struct lpfc_vport *vport = phba->pport; - struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - - spin_lock_irq(shost->host_lock); - vport->load_flag |= FC_UNLOADING; - spin_unlock_irq(shost->host_lock); - - phba->pport->work_port_events = 0; - - /* Stop the SLI4 device port */ - lpfc_stop_port(phba); - - lpfc_sli4_disable_intr(phba); - - /* Reset SLI4 HBA FCoE function */ - lpfc_pci_function_reset(phba); - lpfc_sli4_queue_destroy(phba); - - return; -} - -/** * lpfc_sli4_xri_exchange_busy_wait - Wait for device XRI exchange busy * @phba: Pointer to HBA context object. * @@ -9591,7 +9552,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct Scsi_Host *shost = NULL; int error, ret; uint32_t cfg_mode, intr_mode; - int mcnt; int adjusted_fcp_io_channel; /* Allocate memory for HBA structure */ @@ -9680,58 +9640,35 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) shost = lpfc_shost_from_vport(vport); /* save shost for error cleanup */ /* Now, trying to enable interrupt and bring up the device */ cfg_mode = phba->cfg_use_msi; - while (true) { - /* Put device to a known state before enabling interrupt */ - lpfc_stop_port(phba); - /* Configure and enable interrupt */ - intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); - if (intr_mode == LPFC_INTR_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0426 Failed to enable interrupt.\n"); - error = -ENODEV; - goto out_free_sysfs_attr; - } - /* Default to single EQ for non-MSI-X */ - if (phba->intr_type != MSIX) - adjusted_fcp_io_channel = 1; - else - adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; - phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; - /* Set up SLI-4 HBA */ - if (lpfc_sli4_hba_setup(phba)) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "1421 Failed to set up hba\n"); - error = -ENODEV; - goto out_disable_intr; - } - /* Send NOP mbx cmds for non-INTx mode active interrupt test */ - if (intr_mode != 0) - mcnt = lpfc_sli4_send_nop_mbox_cmds(phba, - LPFC_ACT_INTR_CNT); - - /* Check active interrupts received only for MSI/MSI-X */ - if (intr_mode == 0 || - phba->sli.slistat.sli_intr >= LPFC_ACT_INTR_CNT) { - /* Log the current active interrupt mode */ - phba->intr_mode = intr_mode; - lpfc_log_intr_mode(phba, intr_mode); - break; - } - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "0451 Configure interrupt mode (%d) " - "failed active interrupt test.\n", - intr_mode); - /* Unset the previous SLI-4 HBA setup. */ - /* - * TODO: Is this operation compatible with IF TYPE 2 - * devices? All port state is deleted and cleared. - */ - lpfc_sli4_unset_hba(phba); - /* Try next level of interrupt mode */ - cfg_mode = --intr_mode; + /* Put device to a known state before enabling interrupt */ + lpfc_stop_port(phba); + /* Configure and enable interrupt */ + intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); + if (intr_mode == LPFC_INTR_ERROR) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0426 Failed to enable interrupt.\n"); + error = -ENODEV; + goto out_free_sysfs_attr; + } + /* Default to single EQ for non-MSI-X */ + if (phba->intr_type != MSIX) + adjusted_fcp_io_channel = 1; + else + adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; + phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; + /* Set up SLI-4 HBA */ + if (lpfc_sli4_hba_setup(phba)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "1421 Failed to set up hba\n"); + error = -ENODEV; + goto out_disable_intr; } + /* Log the current active interrupt mode */ + phba->intr_mode = intr_mode; + lpfc_log_intr_mode(phba, intr_mode); + /* Perform post initialization setup */ lpfc_post_init_setup(phba); diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index d8fadcb2db73..46128c679202 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1115,6 +1115,13 @@ out: "0261 Cannot Register NameServer login\n"); } + /* + ** In case the node reference counter does not go to zero, ensure that + ** the stale state for the node is not processed. + */ + + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; spin_unlock_irq(shost->host_lock); @@ -2159,13 +2166,16 @@ lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, { struct lpfc_iocbq *cmdiocb, *rspiocb; IOCB_t *irsp; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); cmdiocb = (struct lpfc_iocbq *) arg; rspiocb = cmdiocb->context_un.rsp_iocb; irsp = &rspiocb->iocb; if (irsp->ulpStatus) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; + spin_unlock_irq(shost->host_lock); return NLP_STE_FREED_NODE; } return ndlp->nlp_state; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 60e5a177644c..98af07c6e300 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -288,6 +288,26 @@ lpfc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) } /** + * lpfc_change_queue_type() - Change a device's scsi tag queuing type + * @sdev: Pointer the scsi device whose queue depth is to change + * @tag_type: Identifier for queue tag type + */ +static int +lpfc_change_queue_type(struct scsi_device *sdev, int tag_type) +{ + if (sdev->tagged_supported) { + scsi_set_tag_type(sdev, tag_type); + if (tag_type) + scsi_activate_tcq(sdev, sdev->queue_depth); + else + scsi_deactivate_tcq(sdev, sdev->queue_depth); + } else + tag_type = 0; + + return tag_type; +} + +/** * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread * @phba: The Hba for which this call is being executed. * @@ -3972,7 +3992,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, break; } } else - fcp_cmnd->fcpCntl1 = 0; + fcp_cmnd->fcpCntl1 = SIMPLE_Q; sli4 = (phba->sli_rev == LPFC_SLI_REV4); @@ -5150,6 +5170,7 @@ struct scsi_host_template lpfc_template = { .max_sectors = 0xFFFF, .vendor_id = LPFC_NL_VENDOR_ID, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; struct scsi_host_template lpfc_vport_template = { @@ -5172,4 +5193,5 @@ struct scsi_host_template lpfc_vport_template = { .shost_attrs = lpfc_vport_attrs, .max_sectors = 0xFFFF, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 624eab370396..55b6fc83ad71 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -124,10 +124,17 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe) /* Ring Doorbell */ doorbell.word0 = 0; - bf_set(lpfc_wq_doorbell_num_posted, &doorbell, 1); - bf_set(lpfc_wq_doorbell_index, &doorbell, host_index); - bf_set(lpfc_wq_doorbell_id, &doorbell, q->queue_id); - writel(doorbell.word0, q->phba->sli4_hba.WQDBregaddr); + if (q->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index); + bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id); + } else if (q->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_wq_db_ring_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_ring_fm_id, &doorbell, q->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, q->db_regaddr); return 0; } @@ -456,10 +463,20 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, /* Ring The Header Receive Queue Doorbell */ if (!(hq->host_index % hq->entry_repost)) { doorbell.word0 = 0; - bf_set(lpfc_rq_doorbell_num_posted, &doorbell, - hq->entry_repost); - bf_set(lpfc_rq_doorbell_id, &doorbell, hq->queue_id); - writel(doorbell.word0, hq->phba->sli4_hba.RQDBregaddr); + if (hq->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id); + } else if (hq->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_list_fm_index, &doorbell, + hq->host_index); + bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, hq->db_regaddr); } return put_index; } @@ -4939,7 +4956,7 @@ out_free_mboxq: static void lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba) { - uint8_t fcp_eqidx; + int fcp_eqidx; lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM); lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM); @@ -5622,6 +5639,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) } /* RPIs. */ count = phba->sli4_hba.max_cfg_param.max_rpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3279 Invalid provisioning of " + "rpi:%d\n", count); + rc = -EINVAL; + goto err_exit; + } base = phba->sli4_hba.max_cfg_param.rpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.rpi_bmask = kzalloc(longs * @@ -5644,6 +5668,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VPIs. */ count = phba->sli4_hba.max_cfg_param.max_vpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3280 Invalid provisioning of " + "vpi:%d\n", count); + rc = -EINVAL; + goto free_rpi_ids; + } base = phba->sli4_hba.max_cfg_param.vpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->vpi_bmask = kzalloc(longs * @@ -5666,6 +5697,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* XRIs. */ count = phba->sli4_hba.max_cfg_param.max_xri; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3281 Invalid provisioning of " + "xri:%d\n", count); + rc = -EINVAL; + goto free_vpi_ids; + } base = phba->sli4_hba.max_cfg_param.xri_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.xri_bmask = kzalloc(longs * @@ -5689,6 +5727,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VFIs. */ count = phba->sli4_hba.max_cfg_param.max_vfi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3282 Invalid provisioning of " + "vfi:%d\n", count); + rc = -EINVAL; + goto free_xri_ids; + } base = phba->sli4_hba.max_cfg_param.vfi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.vfi_bmask = kzalloc(longs * @@ -8370,7 +8415,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, * This is a continuation of a commandi,(CX) so this * sglq is on the active list */ - sglq = __lpfc_get_active_sglq(phba, piocb->sli4_xritag); + sglq = __lpfc_get_active_sglq(phba, piocb->sli4_lxritag); if (!sglq) return IOCB_ERROR; } @@ -8855,12 +8900,6 @@ lpfc_sli_setup(struct lpfc_hba *phba) pring->prt[3].type = FC_TYPE_CT; pring->prt[3].lpfc_sli_rcv_unsol_event = lpfc_ct_unsol_event; - /* abort unsolicited sequence */ - pring->prt[4].profile = 0; /* Mask 4 */ - pring->prt[4].rctl = FC_RCTL_BA_ABTS; - pring->prt[4].type = FC_TYPE_BLS; - pring->prt[4].lpfc_sli_rcv_unsol_event = - lpfc_sli4_ct_abort_unsol_event; break; } totiocbsize += (pring->sli.sli3.numCiocb * @@ -11873,7 +11912,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) struct lpfc_eqe *eqe; unsigned long iflag; int ecount = 0; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id; @@ -11975,7 +12014,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id) struct lpfc_hba *phba; irqreturn_t hba_irq_rc; bool hba_handled = false; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ phba = (struct lpfc_hba *)dev_id; @@ -12097,6 +12136,54 @@ out_fail: } /** + * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory + * @phba: HBA structure that indicates port to create a queue on. + * @pci_barset: PCI BAR set flag. + * + * This function shall perform iomap of the specified PCI BAR address to host + * memory address if not already done so and return it. The returned host + * memory address can be NULL. + */ +static void __iomem * +lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) +{ + struct pci_dev *pdev; + unsigned long bar_map, bar_map_len; + + if (!phba->pcidev) + return NULL; + else + pdev = phba->pcidev; + + switch (pci_barset) { + case WQ_PCI_BAR_0_AND_1: + if (!phba->pci_bar0_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR0); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR0); + phba->pci_bar0_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar0_memmap_p; + case WQ_PCI_BAR_2_AND_3: + if (!phba->pci_bar2_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR2); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR2); + phba->pci_bar2_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar2_memmap_p; + case WQ_PCI_BAR_4_AND_5: + if (!phba->pci_bar4_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR4); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); + phba->pci_bar4_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar4_memmap_p; + default: + break; + } + return NULL; +} + +/** * lpfc_modify_fcp_eq_delay - Modify Delay Multiplier on FCP EQs * @phba: HBA structure that indicates port to create a queue on. * @startq: The starting FCP EQ to modify @@ -12673,6 +12760,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; struct dma_address *page; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!wq || !cq) @@ -12696,6 +12786,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, cq->queue_id); bf_set(lpfc_mbox_hdr_version, &shdr->request, phba->sli4_hba.pc_sli4_params.wqv); + if (phba->sli4_hba.pc_sli4_params.wqv == LPFC_Q_CREATE_VERSION_1) { bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1, wq->entry_count); @@ -12723,6 +12814,10 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, page[dmabuf->buffer_tag].addr_lo = putPaddrLow(dmabuf->phys); page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_wq_create_dua, &wq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12740,6 +12835,47 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, status = -ENXIO; goto out; } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + wq->db_format = bf_get(lpfc_mbx_wq_create_db_format, + &wq_create->u.response); + if ((wq->db_format != LPFC_DB_LIST_FORMAT) && + (wq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3265 WQ[%d] doorbell format not " + "supported: x%x\n", wq->queue_id, + wq->db_format); + status = -EINVAL; + goto out; + } + pci_barset = bf_get(lpfc_mbx_wq_create_bar_set, + &wq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3263 WQ[%d] failed to memmap pci " + "barset:x%x\n", wq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + db_offset = wq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_WQ_DOORBELL) && + (db_offset != LPFC_ULP1_WQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3252 WQ[%d] doorbell offset not " + "supported: x%x\n", wq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + wq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3264 WQ[%d]: barset:x%x, offset:x%x\n", + wq->queue_id, pci_barset, db_offset); + } else { + wq->db_format = LPFC_DB_LIST_FORMAT; + wq->db_regaddr = phba->sli4_hba.WQDBregaddr; + } wq->type = LPFC_WQ; wq->assoc_qid = cq->queue_id; wq->subtype = subtype; @@ -12816,6 +12952,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, uint32_t shdr_status, shdr_add_status; union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!hrq || !drq || !cq) @@ -12894,6 +13033,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12911,6 +13053,50 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, status = -ENXIO; goto out; } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + hrq->db_format = bf_get(lpfc_mbx_rq_create_db_format, + &rq_create->u.response); + if ((hrq->db_format != LPFC_DB_LIST_FORMAT) && + (hrq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3262 RQ [%d] doorbell format not " + "supported: x%x\n", hrq->queue_id, + hrq->db_format); + status = -EINVAL; + goto out; + } + + pci_barset = bf_get(lpfc_mbx_rq_create_bar_set, + &rq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3269 RQ[%d] failed to memmap pci " + "barset:x%x\n", hrq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + + db_offset = rq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_RQ_DOORBELL) && + (db_offset != LPFC_ULP1_RQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3270 RQ[%d] doorbell offset not " + "supported: x%x\n", hrq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + hrq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3266 RQ[qid:%d]: barset:x%x, offset:x%x\n", + hrq->queue_id, pci_barset, db_offset); + } else { + hrq->db_format = LPFC_DB_RING_FORMAT; + hrq->db_regaddr = phba->sli4_hba.RQDBregaddr; + } hrq->type = LPFC_HRQ; hrq->assoc_qid = cq->queue_id; hrq->subtype = subtype; @@ -12976,6 +13162,8 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr; @@ -14063,6 +14251,40 @@ lpfc_sli4_abort_partial_seq(struct lpfc_vport *vport, } /** + * lpfc_sli4_abort_ulp_seq - Abort assembled unsol sequence from ulp + * @vport: pointer to a vitural port + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function tries to abort from the assembed sequence from upper level + * protocol, described by the information from basic abbort @dmabuf. It + * checks to see whether such pending context exists at upper level protocol. + * If so, it shall clean up the pending context. + * + * Return + * true -- if there is matching pending context of the sequence cleaned + * at ulp; + * false -- if there is no matching pending context of the sequence present + * at ulp. + **/ +static bool +lpfc_sli4_abort_ulp_seq(struct lpfc_vport *vport, struct hbq_dmabuf *dmabuf) +{ + struct lpfc_hba *phba = vport->phba; + int handled; + + /* Accepting abort at ulp with SLI4 only */ + if (phba->sli_rev < LPFC_SLI_REV4) + return false; + + /* Register all caring upper level protocols to attend abort */ + handled = lpfc_ct_handle_unsol_abort(phba, dmabuf); + if (handled) + return true; + + return false; +} + +/** * lpfc_sli4_seq_abort_rsp_cmpl - BLS ABORT RSP seq abort iocb complete handler * @phba: Pointer to HBA context object. * @cmd_iocbq: pointer to the command iocbq structure. @@ -14077,8 +14299,14 @@ lpfc_sli4_seq_abort_rsp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmd_iocbq, struct lpfc_iocbq *rsp_iocbq) { - if (cmd_iocbq) + struct lpfc_nodelist *ndlp; + + if (cmd_iocbq) { + ndlp = (struct lpfc_nodelist *)cmd_iocbq->context1; + lpfc_nlp_put(ndlp); + lpfc_nlp_not_used(ndlp); lpfc_sli_release_iocbq(phba, cmd_iocbq); + } /* Failure means BLS ABORT RSP did not get delivered to remote node*/ if (rsp_iocbq && rsp_iocbq->iocb.ulpStatus) @@ -14118,9 +14346,10 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba, * event after aborting the sequence handling. **/ static void -lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, - struct fc_frame_header *fc_hdr) +lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr, bool aborted) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *ctiocb = NULL; struct lpfc_nodelist *ndlp; uint16_t oxid, rxid, xri, lxri; @@ -14135,12 +14364,27 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, oxid = be16_to_cpu(fc_hdr->fh_ox_id); rxid = be16_to_cpu(fc_hdr->fh_rx_id); - ndlp = lpfc_findnode_did(phba->pport, sid); + ndlp = lpfc_findnode_did(vport, sid); if (!ndlp) { - lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, - "1268 Find ndlp returned NULL for oxid:x%x " - "SID:x%x\n", oxid, sid); - return; + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "1268 Failed to allocate ndlp for " + "oxid:x%x SID:x%x\n", oxid, sid); + return; + } + lpfc_nlp_init(vport, ndlp, sid); + /* Put ndlp onto pport node list */ + lpfc_enqueue_node(vport, ndlp); + } else if (!NLP_CHK_NODE_ACT(ndlp)) { + /* re-setup ndlp without removing from node list */ + ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "3275 Failed to active ndlp found " + "for oxid:x%x SID:x%x\n", oxid, sid); + return; + } } /* Allocate buffer for rsp iocb */ @@ -14164,7 +14408,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, icmd->ulpLe = 1; icmd->ulpClass = CLASS3; icmd->ulpContext = phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; - ctiocb->context1 = ndlp; + ctiocb->context1 = lpfc_nlp_get(ndlp); ctiocb->iocb_cmpl = NULL; ctiocb->vport = phba->pport; @@ -14183,14 +14427,24 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, if (lxri != NO_XRI) lpfc_set_rrq_active(phba, ndlp, lxri, (xri == oxid) ? rxid : oxid, 0); - /* If the oxid maps to the FCP XRI range or if it is out of range, - * send a BLS_RJT. The driver no longer has that exchange. - * Override the IOCB for a BA_RJT. + /* For BA_ABTS from exchange responder, if the logical xri with + * the oxid maps to the FCP XRI range, the port no longer has + * that exchange context, send a BLS_RJT. Override the IOCB for + * a BA_RJT. */ - if (xri > (phba->sli4_hba.max_cfg_param.max_xri + - phba->sli4_hba.max_cfg_param.xri_base) || - xri > (lpfc_sli4_get_els_iocb_cnt(phba) + - phba->sli4_hba.max_cfg_param.xri_base)) { + if ((fctl & FC_FC_EX_CTX) && + (lxri > lpfc_sli4_get_els_iocb_cnt(phba))) { + icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; + bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); + bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); + bf_set(lpfc_rsn_code, &icmd->un.bls_rsp, FC_BA_RJT_UNABLE); + } + + /* If BA_ABTS failed to abort a partially assembled receive sequence, + * the driver no longer has that exchange, send a BLS_RJT. Override + * the IOCB for a BA_RJT. + */ + if (aborted == false) { icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); @@ -14214,17 +14468,19 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, bf_set(lpfc_abts_oxid, &icmd->un.bls_rsp, oxid); /* Xmit CT abts response on exchange <xid> */ - lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0); if (rc == IOCB_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_ELS, - "2925 Failed to issue CT ABTS RSP x%x on " - "xri x%x, Data x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, - phba->link_state); + lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + "2925 Failed to issue CT ABTS RSP x%x on " + "xri x%x, Data x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, + phba->link_state); + lpfc_nlp_put(ndlp); + ctiocb->context1 = NULL; lpfc_sli_release_iocbq(phba, ctiocb); } } @@ -14249,32 +14505,25 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport, struct lpfc_hba *phba = vport->phba; struct fc_frame_header fc_hdr; uint32_t fctl; - bool abts_par; + bool aborted; /* Make a copy of fc_hdr before the dmabuf being released */ memcpy(&fc_hdr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); fctl = sli4_fctl_from_fc_hdr(&fc_hdr); if (fctl & FC_FC_EX_CTX) { - /* - * ABTS sent by responder to exchange, just free the buffer - */ - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by responder to exchange, no cleanup needed */ + aborted = true; } else { - /* - * ABTS sent by initiator to exchange, need to do cleanup - */ - /* Try to abort partially assembled seq */ - abts_par = lpfc_sli4_abort_partial_seq(vport, dmabuf); - - /* Send abort to ULP if partially seq abort failed */ - if (abts_par == false) - lpfc_sli4_send_seq_to_ulp(vport, dmabuf); - else - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by initiator to exchange, need to do cleanup */ + aborted = lpfc_sli4_abort_partial_seq(vport, dmabuf); + if (aborted == false) + aborted = lpfc_sli4_abort_ulp_seq(vport, dmabuf); } - /* Send basic accept (BA_ACC) to the abort requester */ - lpfc_sli4_seq_abort_rsp(phba, &fc_hdr); + lpfc_in_buf_free(phba, &dmabuf->dbuf); + + /* Respond with BA_ACC or BA_RJT accordingly */ + lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted); } /** @@ -15307,10 +15556,13 @@ lpfc_sli4_fcf_rr_next_index_get(struct lpfc_hba *phba) { uint16_t next_fcf_index; +initial_priority: /* Search start from next bit of currently registered FCF index */ + next_fcf_index = phba->fcf.current_rec.fcf_indx; + next_priority: - next_fcf_index = (phba->fcf.current_rec.fcf_indx + 1) % - LPFC_SLI4_FCF_TBL_INDX_MAX; + /* Determine the next fcf index to check */ + next_fcf_index = (next_fcf_index + 1) % LPFC_SLI4_FCF_TBL_INDX_MAX; next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask, LPFC_SLI4_FCF_TBL_INDX_MAX, next_fcf_index); @@ -15337,7 +15589,7 @@ next_priority: * at that level and continue the selection process. */ if (lpfc_check_next_fcf_pri_level(phba)) - goto next_priority; + goto initial_priority; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, "2844 No roundrobin failover FCF available\n"); if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 44c427a45d66..be02b59ea279 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -139,6 +139,10 @@ struct lpfc_queue { struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */ + uint16_t db_format; +#define LPFC_DB_RING_FORMAT 0x01 +#define LPFC_DB_LIST_FORMAT 0x02 + void __iomem *db_regaddr; /* For q stats */ uint32_t q_cnt_1; uint32_t q_cnt_2; @@ -508,6 +512,10 @@ struct lpfc_sli4_hba { struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */ struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */ + uint8_t fw_func_mode; /* FW function protocol mode */ + uint32_t ulp0_mode; /* ULP0 protocol mode */ + uint32_t ulp1_mode; /* ULP1 protocol mode */ + /* Setup information for various queue parameters */ int eq_esize; int eq_ecount; diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index ba596e854bbc..f3b7795a296b 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -18,7 +18,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "8.3.36" +#define LPFC_DRIVER_VERSION "8.3.37" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index ffd85c511c8e..5e24e7e73714 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -155,7 +155,7 @@ _base_fault_reset_work(struct work_struct *work) struct task_struct *p; spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->shost_recovery) + if (ioc->shost_recovery || ioc->pci_error_recovery) goto rearm_timer; spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); @@ -164,6 +164,20 @@ _base_fault_reset_work(struct work_struct *work) printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n", ioc->name, __func__); + /* It may be possible that EEH recovery can resolve some of + * pci bus failure issues rather removing the dead ioc function + * by considering controller is in a non-operational state. So + * here priority is given to the EEH recovery. If it doesn't + * not resolve this issue, mpt2sas driver will consider this + * controller to non-operational state and remove the dead ioc + * function. + */ + if (ioc->non_operational_loop++ < 5) { + spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, + flags); + goto rearm_timer; + } + /* * Call _scsih_flush_pending_cmds callback so that we flush all * pending commands back to OS. This call is required to aovid @@ -193,6 +207,8 @@ _base_fault_reset_work(struct work_struct *work) return; /* don't rearm timer */ } + ioc->non_operational_loop = 0; + if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, FORCE_BIG_HAMMER); @@ -4386,6 +4402,7 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc) if (missing_delay[0] != -1 && missing_delay[1] != -1) _base_update_missing_delay(ioc, missing_delay[0], missing_delay[1]); + ioc->non_operational_loop = 0; return 0; diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 543d8d637479..c6ee7aad7501 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -835,6 +835,7 @@ struct MPT2SAS_ADAPTER { u16 cpu_msix_table_sz; u32 ioc_reset_count; MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds; + u32 non_operational_loop; /* internal commands, callback index */ u8 scsi_io_cb_idx; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 04f8010f0770..18360032a520 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -1310,7 +1309,6 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, void *sg_local, *chain; u32 chain_offset; u32 chain_length; - u32 chain_flags; int sges_left; u32 sges_in_segment; u8 simple_sgl_flags; @@ -1356,8 +1354,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, sges_in_segment--; } - /* initializing the chain flags and pointers */ - chain_flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT << MPI2_SGE_FLAGS_SHIFT; + /* initializing the pointers */ chain_req = _base_get_chain_buffer_tracker(ioc, smid); if (!chain_req) return -1; diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index ce7e59b2fc08..1df9ed4f371d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 8af944d7d13d..054d5231c974 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -3136,7 +3135,7 @@ _ctl_diag_trigger_mpi_store(struct device *cdev, spin_lock_irqsave(&ioc->diag_trigger_lock, flags); sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count); memset(&ioc->diag_trigger_mpi, 0, - sizeof(struct SL_WH_EVENT_TRIGGERS_T)); + sizeof(ioc->diag_trigger_mpi)); memcpy(&ioc->diag_trigger_mpi, buf, sz); if (ioc->diag_trigger_mpi.ValidEntries > NUM_VALID_ENTRIES) ioc->diag_trigger_mpi.ValidEntries = NUM_VALID_ENTRIES; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6421a06c4ce2..dcbf7c880cb2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -2755,13 +2754,11 @@ _scsih_block_io_to_children_attached_directly(struct MPT3SAS_ADAPTER *ioc, int i; u16 handle; u16 reason_code; - u8 phy_number; for (i = 0; i < event_data->NumEntries; i++) { handle = le16_to_cpu(event_data->PHY[i].AttachedDevHandle); if (!handle) continue; - phy_number = event_data->StartPhyNum + i; reason_code = event_data->PHY[i].PhyStatus & MPI2_EVENT_SAS_TOPO_RC_MASK; if (reason_code == MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING) diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c index da6c5f25749c..6f8d6213040b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c +++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> diff --git a/drivers/scsi/qla4xxx/ql4_83xx.c b/drivers/scsi/qla4xxx/ql4_83xx.c index 6e9af20be12f..5d8fe4f75650 100644 --- a/drivers/scsi/qla4xxx/ql4_83xx.c +++ b/drivers/scsi/qla4xxx/ql4_83xx.c @@ -538,7 +538,7 @@ struct device_info { int port_num; }; -static int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) { uint32_t drv_active; uint32_t dev_part, dev_part1, dev_part2; @@ -1351,31 +1351,58 @@ exit_start_fw: /*----------------------Interrupt Related functions ---------------------*/ -void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_disable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (test_and_clear_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) + qla4_8xxx_intr_disable(ha); +} + +static void qla4_83xx_disable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int, ret; - if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + if (test_and_clear_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + ret = readl(&ha->qla4_83xx_reg->mbox_int); + mb_int = ret & ~INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(1, &ha->qla4_83xx_reg->leg_int_mask); + } +} - ret = readl(&ha->qla4_83xx_reg->mbox_int); - mb_int = ret & ~INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(1, &ha->qla4_83xx_reg->leg_int_mask); +void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_disable_mbox_intrs(ha); + qla4_83xx_disable_iocb_intrs(ha); } -void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_enable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (!test_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) { + qla4_8xxx_intr_enable(ha); + set_bit(AF_83XX_IOCB_INTR_ON, &ha->flags); + } +} + +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int; - qla4_8xxx_mbx_intr_enable(ha); - mb_int = INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(0, &ha->qla4_83xx_reg->leg_int_mask); + if (!test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + mb_int = INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(0, &ha->qla4_83xx_reg->leg_int_mask); + set_bit(AF_83XX_MBOX_INTR_ON, &ha->flags); + } +} - set_bit(AF_INTERRUPTS_ON, &ha->flags); + +void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_enable_mbox_intrs(ha); + qla4_83xx_enable_iocb_intrs(ha); } + void qla4_83xx_queue_mbox_cmd(struct scsi_qla_host *ha, uint32_t *mbx_cmd, int incount) { diff --git a/drivers/scsi/qla4xxx/ql4_attr.c b/drivers/scsi/qla4xxx/ql4_attr.c index 76819b71ada7..19ee55a6226c 100644 --- a/drivers/scsi/qla4xxx/ql4_attr.c +++ b/drivers/scsi/qla4xxx/ql4_attr.c @@ -74,16 +74,22 @@ qla4_8xxx_sysfs_write_fw_dump(struct file *filep, struct kobject *kobj, } break; case 2: - /* Reset HBA */ + /* Reset HBA and collect FW dump */ ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, QLA8XXX_CRB_DEV_STATE); if (dev_state == QLA8XXX_DEV_READY) { - ql4_printk(KERN_INFO, ha, - "%s: Setting Need reset, reset_owner is 0x%x.\n", - __func__, ha->func_num); + ql4_printk(KERN_INFO, ha, "%s: Setting Need reset\n", + __func__); qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DEV_STATE, QLA8XXX_DEV_NEED_RESET); - set_bit(AF_8XXX_RST_OWNER, &ha->flags); + if (is_qla8022(ha) || + (is_qla8032(ha) && + qla4_83xx_can_perform_reset(ha))) { + set_bit(AF_8XXX_RST_OWNER, &ha->flags); + set_bit(AF_FW_RECOVERY, &ha->flags); + ql4_printk(KERN_INFO, ha, "%s: Reset owner is 0x%x\n", + __func__, ha->func_num); + } } else ql4_printk(KERN_INFO, ha, "%s: Reset not performed as device state is 0x%x\n", diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index 329d553eae94..129f5dd02822 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h @@ -136,6 +136,7 @@ #define RESPONSE_QUEUE_DEPTH 64 #define QUEUE_SIZE 64 #define DMA_BUFFER_SIZE 512 +#define IOCB_HIWAT_CUSHION 4 /* * Misc @@ -180,6 +181,7 @@ #define DISABLE_ACB_TOV 30 #define IP_CONFIG_TOV 30 #define LOGIN_TOV 12 +#define BOOT_LOGIN_RESP_TOV 60 #define MAX_RESET_HA_RETRIES 2 #define FW_ALIVE_WAIT_TOV 3 @@ -314,6 +316,7 @@ struct ql4_tuple_ddb { * DDB flags. */ #define DF_RELOGIN 0 /* Relogin to device */ +#define DF_BOOT_TGT 1 /* Boot target entry */ #define DF_ISNS_DISCOVERED 2 /* Device was discovered via iSNS */ #define DF_FO_MASKED 3 @@ -501,6 +504,7 @@ struct scsi_qla_host { #define AF_INTERRUPTS_ON 6 /* 0x00000040 */ #define AF_GET_CRASH_RECORD 7 /* 0x00000080 */ #define AF_LINK_UP 8 /* 0x00000100 */ +#define AF_LOOPBACK 9 /* 0x00000200 */ #define AF_IRQ_ATTACHED 10 /* 0x00000400 */ #define AF_DISABLE_ACB_COMPLETE 11 /* 0x00000800 */ #define AF_HA_REMOVAL 12 /* 0x00001000 */ @@ -516,6 +520,8 @@ struct scsi_qla_host { #define AF_8XXX_RST_OWNER 25 /* 0x02000000 */ #define AF_82XX_DUMP_READING 26 /* 0x04000000 */ #define AF_83XX_NO_FW_DUMP 27 /* 0x08000000 */ +#define AF_83XX_IOCB_INTR_ON 28 /* 0x10000000 */ +#define AF_83XX_MBOX_INTR_ON 29 /* 0x20000000 */ unsigned long dpc_flags; @@ -537,6 +543,7 @@ struct scsi_qla_host { uint32_t tot_ddbs; uint16_t iocb_cnt; + uint16_t iocb_hiwat; /* SRB cache. */ #define SRB_MIN_REQ 128 @@ -838,7 +845,8 @@ static inline int is_aer_supported(struct scsi_qla_host *ha) static inline int adapter_up(struct scsi_qla_host *ha) { return (test_bit(AF_ONLINE, &ha->flags) != 0) && - (test_bit(AF_LINK_UP, &ha->flags) != 0); + (test_bit(AF_LINK_UP, &ha->flags) != 0) && + (!test_bit(AF_LOOPBACK, &ha->flags)); } static inline struct scsi_qla_host* to_qla_host(struct Scsi_Host *shost) diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h index 1c4795020357..ad9d2e2d370f 100644 --- a/drivers/scsi/qla4xxx/ql4_fw.h +++ b/drivers/scsi/qla4xxx/ql4_fw.h @@ -495,7 +495,7 @@ struct qla_flt_region { #define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED 0x802D #define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD 0x802E #define MBOX_ASTS_IDC_COMPLETE 0x8100 -#define MBOX_ASTS_IDC_NOTIFY 0x8101 +#define MBOX_ASTS_IDC_REQUEST_NOTIFICATION 0x8101 #define MBOX_ASTS_TXSCVR_INSERTED 0x8130 #define MBOX_ASTS_TXSCVR_REMOVED 0x8131 @@ -522,6 +522,10 @@ struct qla_flt_region { #define FLASH_OPT_COMMIT 2 #define FLASH_OPT_RMW_COMMIT 3 +/* Loopback type */ +#define ENABLE_INTERNAL_LOOPBACK 0x04 +#define ENABLE_EXTERNAL_LOOPBACK 0x08 + /*************************************************************************/ /* Host Adapter Initialization Control Block (from host) */ diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h index 57a5a3cf5770..982293edf02c 100644 --- a/drivers/scsi/qla4xxx/ql4_glbl.h +++ b/drivers/scsi/qla4xxx/ql4_glbl.h @@ -253,12 +253,14 @@ void qla4_8xxx_set_rst_ready(struct scsi_qla_host *ha); void qla4_8xxx_clear_rst_ready(struct scsi_qla_host *ha); int qla4_8xxx_device_bootstrap(struct scsi_qla_host *ha); void qla4_8xxx_get_minidump(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha); int qla4_8xxx_set_param(struct scsi_qla_host *ha, int param); int qla4_8xxx_update_idc_reg(struct scsi_qla_host *ha); int qla4_83xx_post_idc_ack(struct scsi_qla_host *ha); void qla4_83xx_disable_pause(struct scsi_qla_host *ha); +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha); +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha); extern int ql4xextended_error_logging; extern int ql4xdontresethba; diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c index 1aca1b4f70b8..8fc8548ba4ba 100644 --- a/drivers/scsi/qla4xxx/ql4_init.c +++ b/drivers/scsi/qla4xxx/ql4_init.c @@ -195,12 +195,10 @@ exit_get_sys_info_no_free: * @ha: pointer to host adapter structure. * **/ -static int qla4xxx_init_local_data(struct scsi_qla_host *ha) +static void qla4xxx_init_local_data(struct scsi_qla_host *ha) { /* Initialize aen queue */ ha->aen_q_count = MAX_AEN_ENTRIES; - - return qla4xxx_get_firmware_status(ha); } static uint8_t @@ -935,14 +933,23 @@ int qla4xxx_initialize_adapter(struct scsi_qla_host *ha, int is_reset) if (ha->isp_ops->start_firmware(ha) == QLA_ERROR) goto exit_init_hba; + /* + * For ISP83XX, mailbox and IOCB interrupts are enabled separately. + * Mailbox interrupts must be enabled prior to issuing any mailbox + * command in order to prevent the possibility of losing interrupts + * while switching from polling to interrupt mode. IOCB interrupts are + * enabled via isp_ops->enable_intrs. + */ + if (is_qla8032(ha)) + qla4_83xx_enable_mbox_intrs(ha); + if (qla4xxx_about_firmware(ha) == QLA_ERROR) goto exit_init_hba; if (ha->isp_ops->get_sys_info(ha) == QLA_ERROR) goto exit_init_hba; - if (qla4xxx_init_local_data(ha) == QLA_ERROR) - goto exit_init_hba; + qla4xxx_init_local_data(ha); status = qla4xxx_init_firmware(ha); if (status == QLA_ERROR) diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c index f48f37a281d1..14fec976f634 100644 --- a/drivers/scsi/qla4xxx/ql4_iocb.c +++ b/drivers/scsi/qla4xxx/ql4_iocb.c @@ -316,7 +316,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) goto queuing_error; /* total iocbs active */ - if ((ha->iocb_cnt + req_cnt) >= REQUEST_QUEUE_DEPTH) + if ((ha->iocb_cnt + req_cnt) >= ha->iocb_hiwat) goto queuing_error; /* Build command packet */ diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c index 15ea81465ce4..1b83dc283d2e 100644 --- a/drivers/scsi/qla4xxx/ql4_isr.c +++ b/drivers/scsi/qla4xxx/ql4_isr.c @@ -582,6 +582,33 @@ exit_prq_error: } /** + * qla4_83xx_loopback_in_progress: Is loopback in progress? + * @ha: Pointer to host adapter structure. + * @ret: 1 = loopback in progress, 0 = loopback not in progress + **/ +static int qla4_83xx_loopback_in_progress(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if ((ha->idc_info.info2 & ENABLE_INTERNAL_LOOPBACK) || + (ha->idc_info.info2 & ENABLE_EXTERNAL_LOOPBACK)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics in progress\n", + __func__)); + rval = 1; + } else { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics not in progress\n", + __func__)); + rval = 0; + } + } + + return rval; +} + +/** * qla4xxx_isr_decode_mailbox - decodes mailbox status * @ha: Pointer to host adapter structure. * @mailbox_status: Mailbox status. @@ -676,8 +703,10 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, case MBOX_ASTS_LINK_DOWN: clear_bit(AF_LINK_UP, &ha->flags); - if (test_bit(AF_INIT_DONE, &ha->flags)) + if (test_bit(AF_INIT_DONE, &ha->flags)) { set_bit(DPC_LINK_CHANGED, &ha->dpc_flags); + qla4xxx_wake_dpc(ha); + } ql4_printk(KERN_INFO, ha, "%s: LINK DOWN\n", __func__); qla4xxx_post_aen_work(ha, ISCSI_EVENT_LINKDOWN, @@ -806,7 +835,7 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, " removed\n", ha->host_no, mbox_sts[0])); break; - case MBOX_ASTS_IDC_NOTIFY: + case MBOX_ASTS_IDC_REQUEST_NOTIFICATION: { uint32_t opcode; if (is_qla8032(ha)) { @@ -840,6 +869,11 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, DEBUG2(ql4_printk(KERN_INFO, ha, "scsi:%ld: AEN %04x IDC Complete notification\n", ha->host_no, mbox_sts[0])); + + if (qla4_83xx_loopback_in_progress(ha)) + set_bit(AF_LOOPBACK, &ha->flags); + else + clear_bit(AF_LOOPBACK, &ha->flags); } break; @@ -1124,17 +1158,18 @@ irqreturn_t qla4_83xx_intr_handler(int irq, void *dev_id) /* Legacy interrupt is valid if bit31 of leg_int_ptr is set */ if (!(leg_int_ptr & LEG_INT_PTR_B31)) { - ql4_printk(KERN_ERR, ha, - "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", - __func__); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", + __func__)); return IRQ_NONE; } /* Validate the PCIE function ID set in leg_int_ptr bits [19..16] */ if ((leg_int_ptr & PF_BITS_MASK) != ha->pf_bit) { - ql4_printk(KERN_ERR, ha, - "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", - __func__, (leg_int_ptr & PF_BITS_MASK), ha->pf_bit); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", + __func__, (leg_int_ptr & PF_BITS_MASK), + ha->pf_bit)); return IRQ_NONE; } @@ -1437,11 +1472,14 @@ irq_not_attached: void qla4xxx_free_irqs(struct scsi_qla_host *ha) { - if (test_bit(AF_MSIX_ENABLED, &ha->flags)) - qla4_8xxx_disable_msix(ha); - else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { - free_irq(ha->pdev->irq, ha); - pci_disable_msi(ha->pdev); - } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) - free_irq(ha->pdev->irq, ha); + if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) { + if (test_bit(AF_MSIX_ENABLED, &ha->flags)) { + qla4_8xxx_disable_msix(ha); + } else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + pci_disable_msi(ha->pdev); + } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + } + } } diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 3d41034191f0..81e738d61ec0 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c @@ -44,6 +44,30 @@ void qla4xxx_process_mbox_intr(struct scsi_qla_host *ha, int out_count) } /** + * qla4xxx_is_intr_poll_mode – Are we allowed to poll for interrupts? + * @ha: Pointer to host adapter structure. + * @ret: 1=polling mode, 0=non-polling mode + **/ +static int qla4xxx_is_intr_poll_mode(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) + rval = 0; + } else { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_INTERRUPTS_ON, &ha->flags) && + test_bit(AF_ONLINE, &ha->flags) && + !test_bit(AF_HA_REMOVAL, &ha->flags)) + rval = 0; + } + + return rval; +} + +/** * qla4xxx_mailbox_command - issues mailbox commands * @ha: Pointer to host adapter structure. * @inCount: number of mailbox registers to load. @@ -153,33 +177,28 @@ int qla4xxx_mailbox_command(struct scsi_qla_host *ha, uint8_t inCount, /* * Wait for completion: Poll or completion queue */ - if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && - test_bit(AF_INTERRUPTS_ON, &ha->flags) && - test_bit(AF_ONLINE, &ha->flags) && - !test_bit(AF_HA_REMOVAL, &ha->flags)) { - /* Do not poll for completion. Use completion queue */ - set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); - clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - } else { + if (qla4xxx_is_intr_poll_mode(ha)) { /* Poll for command to complete */ wait_count = jiffies + MBOX_TOV * HZ; while (test_bit(AF_MBOX_COMMAND_DONE, &ha->flags) == 0) { if (time_after_eq(jiffies, wait_count)) break; - /* * Service the interrupt. * The ISR will save the mailbox status registers * to a temporary storage location in the adapter * structure. */ - spin_lock_irqsave(&ha->hardware_lock, flags); ha->isp_ops->process_mailbox_interrupt(ha, outCount); spin_unlock_irqrestore(&ha->hardware_lock, flags); msleep(10); } + } else { + /* Do not poll for completion. Use completion queue */ + set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); + wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); + clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); } /* Check for mailbox timeout. */ @@ -678,8 +697,24 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha) return QLA_ERROR; } - ql4_printk(KERN_INFO, ha, "%ld firmware IOCBs available (%d).\n", - ha->host_no, mbox_sts[2]); + /* High-water mark of IOCBs */ + ha->iocb_hiwat = mbox_sts[2]; + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: firmware IOCBs available = %d\n", __func__, + ha->iocb_hiwat)); + + if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION) + ha->iocb_hiwat -= IOCB_HIWAT_CUSHION; + + /* Ideally, we should not enter this code, as the # of firmware + * IOCBs is hard-coded in the firmware. We set a default + * iocb_hiwat here just in case */ + if (ha->iocb_hiwat == 0) { + ha->iocb_hiwat = REQUEST_QUEUE_DEPTH / 4; + DEBUG2(ql4_printk(KERN_WARNING, ha, + "%s: Setting IOCB's to = %d\n", __func__, + ha->iocb_hiwat)); + } return QLA_SUCCESS; } diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c index 499a92db1cf6..71d3d234f526 100644 --- a/drivers/scsi/qla4xxx/ql4_nx.c +++ b/drivers/scsi/qla4xxx/ql4_nx.c @@ -2986,7 +2986,7 @@ int qla4_8xxx_load_risc(struct scsi_qla_host *ha) retval = qla4_8xxx_device_state_handler(ha); - if (retval == QLA_SUCCESS && !test_bit(AF_INIT_DONE, &ha->flags)) + if (retval == QLA_SUCCESS && !test_bit(AF_IRQ_ATTACHED, &ha->flags)) retval = qla4xxx_request_irqs(ha); return retval; @@ -3427,11 +3427,11 @@ int qla4_8xxx_get_sys_info(struct scsi_qla_host *ha) } /* Make sure we receive the minimum required data to cache internally */ - if (mbox_sts[4] < offsetof(struct mbx_sys_info, reserved)) { + if ((is_qla8032(ha) ? mbox_sts[3] : mbox_sts[4]) < + offsetof(struct mbx_sys_info, reserved)) { DEBUG2(printk("scsi%ld: %s: GET_SYS_INFO data receive" " error (%x)\n", ha->host_no, __func__, mbox_sts[4])); goto exit_validate_mac82; - } /* Save M.A.C. address & serial_number */ @@ -3463,7 +3463,7 @@ exit_validate_mac82: /* Interrupt handling helpers. */ -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3484,7 +3484,7 @@ int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) return QLA_SUCCESS; } -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3509,7 +3509,7 @@ int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) void qla4_82xx_enable_intrs(struct scsi_qla_host *ha) { - qla4_8xxx_mbx_intr_enable(ha); + qla4_8xxx_intr_enable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - reset */ @@ -3522,7 +3522,7 @@ void qla4_82xx_disable_intrs(struct scsi_qla_host *ha) { if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + qla4_8xxx_intr_disable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - set */ diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 4cec123a6a6a..6142729167f4 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1337,18 +1337,18 @@ static int qla4xxx_session_get_param(struct iscsi_cls_session *cls_sess, sess->password_in, BIDI_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; case ISCSI_PARAM_CHAP_OUT_IDX: rval = qla4xxx_get_chap_index(ha, sess->username, sess->password, LOCAL_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; default: return iscsi_session_get_param(cls_sess, param, buf); @@ -2242,6 +2242,7 @@ static int qla4xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags) || !test_bit(AF_ONLINE, &ha->flags) || !test_bit(AF_LINK_UP, &ha->flags) || + test_bit(AF_LOOPBACK, &ha->flags) || test_bit(DPC_RESET_HA_FW_CONTEXT, &ha->dpc_flags)) goto qc_host_busy; @@ -2978,6 +2979,7 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha) if (status == QLA_SUCCESS) { if (!test_bit(AF_FW_RECOVERY, &ha->flags)) qla4xxx_cmd_wait(ha); + ha->isp_ops->disable_intrs(ha); qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); qla4xxx_abort_active_cmds(ha, DID_RESET << 16); @@ -3479,7 +3481,8 @@ dpc_post_reset_ha: } /* ---- link change? --- */ - if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { + if (!test_bit(AF_LOOPBACK, &ha->flags) && + test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { if (!test_bit(AF_LINK_UP, &ha->flags)) { /* ---- link down? --- */ qla4xxx_mark_all_devices_missing(ha); @@ -3508,10 +3511,8 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) { qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16); - if (test_bit(AF_INTERRUPTS_ON, &ha->flags)) { - /* Turn-off interrupts on the card. */ - ha->isp_ops->disable_intrs(ha); - } + /* Turn-off interrupts on the card. */ + ha->isp_ops->disable_intrs(ha); if (is_qla40XX(ha)) { writel(set_rmask(CSR_SCSI_PROCESSOR_INTR), @@ -3547,8 +3548,7 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) } /* Detach interrupts */ - if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) - qla4xxx_free_irqs(ha); + qla4xxx_free_irqs(ha); /* free extra memory */ qla4xxx_mem_free(ha); @@ -4687,7 +4687,8 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, struct iscsi_endpoint *ep; struct sockaddr_in *addr; struct sockaddr_in6 *addr6; - struct sockaddr *dst_addr; + struct sockaddr *t_addr; + struct sockaddr_storage *dst_addr; char *ip; /* TODO: need to destroy on unload iscsi_endpoint*/ @@ -4696,21 +4697,23 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, return NULL; if (fw_ddb_entry->options & DDB_OPT_IPV6_DEVICE) { - dst_addr->sa_family = AF_INET6; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET6; addr6 = (struct sockaddr_in6 *)dst_addr; ip = (char *)&addr6->sin6_addr; memcpy(ip, fw_ddb_entry->ip_addr, IPv6_ADDR_LEN); addr6->sin6_port = htons(le16_to_cpu(fw_ddb_entry->port)); } else { - dst_addr->sa_family = AF_INET; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET; addr = (struct sockaddr_in *)dst_addr; ip = (char *)&addr->sin_addr; memcpy(ip, fw_ddb_entry->ip_addr, IP_ADDR_LEN); addr->sin_port = htons(le16_to_cpu(fw_ddb_entry->port)); } - ep = qla4xxx_ep_connect(ha->host, dst_addr, 0); + ep = qla4xxx_ep_connect(ha->host, (struct sockaddr *)dst_addr, 0); vfree(dst_addr); return ep; } @@ -4725,7 +4728,8 @@ static int qla4xxx_verify_boot_idx(struct scsi_qla_host *ha, uint16_t idx) } static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, - struct ddb_entry *ddb_entry) + struct ddb_entry *ddb_entry, + uint16_t idx) { uint16_t def_timeout; @@ -4745,6 +4749,10 @@ static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, def_timeout : LOGIN_TOV; ddb_entry->default_time2wait = le16_to_cpu(ddb_entry->fw_ddb_entry.iscsi_def_time2wait); + + if (ql4xdisablesysfsboot && + (idx == ha->pri_ddb_idx || idx == ha->sec_ddb_idx)) + set_bit(DF_BOOT_TGT, &ddb_entry->flags); } static void qla4xxx_wait_for_ip_configuration(struct scsi_qla_host *ha) @@ -4881,7 +4889,7 @@ static void qla4xxx_remove_failed_ddb(struct scsi_qla_host *ha, static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, struct dev_db_entry *fw_ddb_entry, - int is_reset) + int is_reset, uint16_t idx) { struct iscsi_cls_session *cls_sess; struct iscsi_session *sess; @@ -4919,7 +4927,7 @@ static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, memcpy(&ddb_entry->fw_ddb_entry, fw_ddb_entry, sizeof(struct dev_db_entry)); - qla4xxx_setup_flash_ddb_entry(ha, ddb_entry); + qla4xxx_setup_flash_ddb_entry(ha, ddb_entry, idx); cls_conn = iscsi_conn_setup(cls_sess, sizeof(struct qla_conn), conn_id); @@ -5036,7 +5044,7 @@ static void qla4xxx_build_nt_list(struct scsi_qla_host *ha, goto continue_next_nt; } - ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset); + ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset, idx); if (ret == QLA_ERROR) goto exit_nt_list; @@ -5116,6 +5124,78 @@ void qla4xxx_build_ddb_list(struct scsi_qla_host *ha, int is_reset) } /** + * qla4xxx_wait_login_resp_boot_tgt - Wait for iSCSI boot target login + * response. + * @ha: pointer to adapter structure + * + * When the boot entry is normal iSCSI target then DF_BOOT_TGT flag will be + * set in DDB and we will wait for login response of boot targets during + * probe. + **/ +static void qla4xxx_wait_login_resp_boot_tgt(struct scsi_qla_host *ha) +{ + struct ddb_entry *ddb_entry; + struct dev_db_entry *fw_ddb_entry = NULL; + dma_addr_t fw_ddb_entry_dma; + unsigned long wtime; + uint32_t ddb_state; + int max_ddbs, idx, ret; + + max_ddbs = is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX : + MAX_DEV_DB_ENTRIES; + + fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + &fw_ddb_entry_dma, GFP_KERNEL); + if (!fw_ddb_entry) { + ql4_printk(KERN_ERR, ha, + "%s: Unable to allocate dma buffer\n", __func__); + goto exit_login_resp; + } + + wtime = jiffies + (HZ * BOOT_LOGIN_RESP_TOV); + + for (idx = 0; idx < max_ddbs; idx++) { + ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx); + if (ddb_entry == NULL) + continue; + + if (test_bit(DF_BOOT_TGT, &ddb_entry->flags)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: DDB index [%d]\n", __func__, + ddb_entry->fw_ddb_index)); + do { + ret = qla4xxx_get_fwddb_entry(ha, + ddb_entry->fw_ddb_index, + fw_ddb_entry, fw_ddb_entry_dma, + NULL, NULL, &ddb_state, NULL, + NULL, NULL); + if (ret == QLA_ERROR) + goto exit_login_resp; + + if ((ddb_state == DDB_DS_SESSION_ACTIVE) || + (ddb_state == DDB_DS_SESSION_FAILED)) + break; + + schedule_timeout_uninterruptible(HZ); + + } while ((time_after(wtime, jiffies))); + + if (!time_after(wtime, jiffies)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Login response wait timer expired\n", + __func__)); + goto exit_login_resp; + } + } + } + +exit_login_resp: + if (fw_ddb_entry) + dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + fw_ddb_entry, fw_ddb_entry_dma); +} + +/** * qla4xxx_probe_adapter - callback function to probe HBA * @pdev: pointer to pci_dev structure * @pci_device_id: pointer to pci_device entry @@ -5270,7 +5350,7 @@ static int qla4xxx_probe_adapter(struct pci_dev *pdev, if (is_qla80XX(ha)) { ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, - QLA82XX_CRB_DEV_STATE); + QLA8XXX_CRB_DEV_STATE); ha->isp_ops->idc_unlock(ha); if (dev_state == QLA8XXX_DEV_FAILED) { ql4_printk(KERN_WARNING, ha, "%s: don't retry " @@ -5368,6 +5448,7 @@ skip_retry_init: /* Perform the build ddb list and login to each */ qla4xxx_build_ddb_list(ha, INIT_ADAPTER); iscsi_host_for_each_session(ha->host, qla4xxx_login_flash_ddb); + qla4xxx_wait_login_resp_boot_tgt(ha); qla4xxx_create_chap_list(ha); @@ -6008,14 +6089,6 @@ static int qla4xxx_host_reset(struct Scsi_Host *shost, int reset_type) goto exit_host_reset; } - rval = qla4xxx_wait_for_hba_online(ha); - if (rval != QLA_SUCCESS) { - DEBUG2(ql4_printk(KERN_INFO, ha, "%s: Unable to reset host " - "adapter\n", __func__)); - rval = -EIO; - goto exit_host_reset; - } - if (test_bit(DPC_RESET_HA, &ha->dpc_flags)) goto recover_adapter; @@ -6115,7 +6188,6 @@ qla4xxx_pci_mmio_enabled(struct pci_dev *pdev) static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) { uint32_t rval = QLA_ERROR; - uint32_t ret = 0; int fn; struct pci_dev *other_pdev = NULL; @@ -6201,16 +6273,7 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DRV_STATE, 0); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to " - "reserve interrupt %d already in use.\n", - ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } + ha->isp_ops->enable_intrs(ha); } } else { ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not " @@ -6220,18 +6283,9 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) QLA8XXX_DEV_READY)) { clear_bit(AF_FW_RECOVERY, &ha->flags); rval = qla4xxx_initialize_adapter(ha, RESET_ADAPTER); - if (rval == QLA_SUCCESS) { - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to" - " reserve interrupt %d already in" - " use.\n", ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } - } + if (rval == QLA_SUCCESS) + ha->isp_ops->enable_intrs(ha); + ha->isp_ops->idc_lock(ha); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h index f6df2ea91ab5..6775a45af315 100644 --- a/drivers/scsi/qla4xxx/ql4_version.h +++ b/drivers/scsi/qla4xxx/ql4_version.h @@ -5,4 +5,4 @@ * See LICENSE.qla4xxx for copyright and licensing details. */ -#define QLA4XXX_DRIVER_VERSION "5.03.00-k1" +#define QLA4XXX_DRIVER_VERSION "5.03.00-k4" diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 59d427bf08e2..0a74b975efdf 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2503,6 +2503,15 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr, } static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator, NULL); +static ssize_t +show_priv_session_target_id(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); + return sprintf(buf, "%d\n", session->target_id); +} +static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO, + show_priv_session_target_id, NULL); #define iscsi_priv_session_attr_show(field, format) \ static ssize_t \ @@ -2575,6 +2584,7 @@ static struct attribute *iscsi_session_attrs[] = { &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, + &dev_attr_priv_sess_target_id.attr, NULL, }; @@ -2638,6 +2648,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_id.attr) + return S_IRUGO; else { WARN_ONCE(1, "Invalid session attr"); return 0; diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index c2c77d1ac499..a764f165b589 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -29,14 +29,14 @@ choice config THERMAL_DEFAULT_GOV_STEP_WISE bool "step_wise" - select STEP_WISE + select THERMAL_GOV_STEP_WISE help Use the step_wise governor as default. This throttles the devices one step at a time. config THERMAL_DEFAULT_GOV_FAIR_SHARE bool "fair_share" - select FAIR_SHARE + select THERMAL_GOV_FAIR_SHARE help Use the fair_share governor as default. This throttles the devices based on their 'contribution' to a zone. The @@ -44,24 +44,24 @@ config THERMAL_DEFAULT_GOV_FAIR_SHARE config THERMAL_DEFAULT_GOV_USER_SPACE bool "user_space" - select USER_SPACE + select THERMAL_GOV_USER_SPACE help Select this if you want to let the user space manage the lpatform thermals. endchoice -config FAIR_SHARE +config THERMAL_GOV_FAIR_SHARE bool "Fair-share thermal governor" help Enable this to manage platform thermals using fair-share governor. -config STEP_WISE +config THERMAL_GOV_STEP_WISE bool "Step_wise thermal governor" help Enable this to manage platform thermals using a simple linear -config USER_SPACE +config THERMAL_GOV_USER_SPACE bool "User_space thermal governor" help Enable this to let the user space manage the platform thermals. @@ -78,6 +78,14 @@ config CPU_THERMAL and not the ACPI interface. If you want this support, you should say Y here. +config THERMAL_EMULATION + bool "Thermal emulation mode support" + help + Enable this option to make a emul_temp sysfs node in thermal zone + directory to support temperature emulation. With emulation sysfs node, + user can manually input temperature and test the different trip + threshold behaviour for simulation purpose. + config SPEAR_THERMAL bool "SPEAr thermal sensor driver" depends on PLAT_SPEAR @@ -93,6 +101,14 @@ config RCAR_THERMAL Enable this to plug the R-Car thermal sensor driver into the Linux thermal framework +config KIRKWOOD_THERMAL + tristate "Temperature sensor on Marvell Kirkwood SoCs" + depends on ARCH_KIRKWOOD + depends on OF + help + Support for the Kirkwood thermal sensor driver into the Linux thermal + framework. Only kirkwood 88F6282 and 88F6283 have this sensor. + config EXYNOS_THERMAL tristate "Temperature sensor on Samsung EXYNOS" depends on (ARCH_EXYNOS4 || ARCH_EXYNOS5) @@ -101,6 +117,23 @@ config EXYNOS_THERMAL If you say yes here you get support for TMU (Thermal Management Unit) on SAMSUNG EXYNOS series of SoC. +config EXYNOS_THERMAL_EMUL + bool "EXYNOS TMU emulation mode support" + depends on EXYNOS_THERMAL + help + Exynos 4412 and 4414 and 5 series has emulation mode on TMU. + Enable this option will be make sysfs node in exynos thermal platform + device directory to support emulation mode. With emulation mode sysfs + node, you can manually input temperature to TMU for simulation purpose. + +config DOVE_THERMAL + tristate "Temperature sensor on Marvell Dove SoCs" + depends on ARCH_DOVE + depends on OF + help + Support for the Dove thermal sensor driver in the Linux thermal + framework. + config DB8500_THERMAL bool "DB8500 thermal management" depends on ARCH_U8500 @@ -122,4 +155,14 @@ config DB8500_CPUFREQ_COOLING bound cpufreq cooling device turns active to set CPU frequency low to cool down the CPU. +config INTEL_POWERCLAMP + tristate "Intel PowerClamp idle injection driver" + depends on THERMAL + depends on X86 + depends on CPU_SUP_INTEL + help + Enable this to enable Intel PowerClamp idle injection driver. This + enforce idle time which results in more package C-state residency. The + user interface is exposed via generic thermal framework. + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index d8da683245fc..d3a2b38c31e8 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -5,9 +5,9 @@ obj-$(CONFIG_THERMAL) += thermal_sys.o # governors -obj-$(CONFIG_FAIR_SHARE) += fair_share.o -obj-$(CONFIG_STEP_WISE) += step_wise.o -obj-$(CONFIG_USER_SPACE) += user_space.o +obj-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o +obj-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o +obj-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o # cpufreq cooling obj-$(CONFIG_CPU_THERMAL) += cpu_cooling.o @@ -15,6 +15,10 @@ obj-$(CONFIG_CPU_THERMAL) += cpu_cooling.o # platform thermal drivers obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o +obj-$(CONFIG_KIRKWOOD_THERMAL) += kirkwood_thermal.o obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o +obj-$(CONFIG_DOVE_THERMAL) += dove_thermal.o obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o +obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o + diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index c33fa5315d6b..8dc44cbb3e09 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -111,8 +111,8 @@ static int is_cpufreq_valid(int cpu) /** * get_cpu_frequency - get the absolute value of frequency from level. * @cpu: cpu for which frequency is fetched. - * @level: level of frequency of the CPU - * e.g level=1 --> 1st MAX FREQ, LEVEL=2 ---> 2nd MAX FREQ, .... etc + * @level: level of frequency, equals cooling state of cpu cooling device + * e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc */ static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level) { diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c index 4cf8e72af90a..21419851fc02 100644 --- a/drivers/thermal/db8500_cpufreq_cooling.c +++ b/drivers/thermal/db8500_cpufreq_cooling.c @@ -21,6 +21,7 @@ #include <linux/cpufreq.h> #include <linux/err.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/slab.h> @@ -73,15 +74,13 @@ static const struct of_device_id db8500_cpufreq_cooling_match[] = { { .compatible = "stericsson,db8500-cpufreq-cooling" }, {}, }; -#else -#define db8500_cpufreq_cooling_match NULL #endif static struct platform_driver db8500_cpufreq_cooling_driver = { .driver = { .owner = THIS_MODULE, .name = "db8500-cpufreq-cooling", - .of_match_table = db8500_cpufreq_cooling_match, + .of_match_table = of_match_ptr(db8500_cpufreq_cooling_match), }, .probe = db8500_cpufreq_cooling_probe, .suspend = db8500_cpufreq_cooling_suspend, diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index ec71ade3e317..61ce60a35921 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -508,15 +508,13 @@ static const struct of_device_id db8500_thermal_match[] = { { .compatible = "stericsson,db8500-thermal" }, {}, }; -#else -#define db8500_thermal_match NULL #endif static struct platform_driver db8500_thermal_driver = { .driver = { .owner = THIS_MODULE, .name = "db8500-thermal", - .of_match_table = db8500_thermal_match, + .of_match_table = of_match_ptr(db8500_thermal_match), }, .probe = db8500_thermal_probe, .suspend = db8500_thermal_suspend, diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c new file mode 100644 index 000000000000..7b0bfa0e7a9c --- /dev/null +++ b/drivers/thermal/dove_thermal.c @@ -0,0 +1,209 @@ +/* + * Dove thermal sensor driver + * + * Copyright (C) 2013 Andrew Lunn <andrew@lunn.ch> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/thermal.h> + +#define DOVE_THERMAL_TEMP_OFFSET 1 +#define DOVE_THERMAL_TEMP_MASK 0x1FF + +/* Dove Thermal Manager Control and Status Register */ +#define PMU_TM_DISABLE_OFFS 0 +#define PMU_TM_DISABLE_MASK (0x1 << PMU_TM_DISABLE_OFFS) + +/* Dove Theraml Diode Control 0 Register */ +#define PMU_TDC0_SW_RST_MASK (0x1 << 1) +#define PMU_TDC0_SEL_VCAL_OFFS 5 +#define PMU_TDC0_SEL_VCAL_MASK (0x3 << PMU_TDC0_SEL_VCAL_OFFS) +#define PMU_TDC0_REF_CAL_CNT_OFFS 11 +#define PMU_TDC0_REF_CAL_CNT_MASK (0x1FF << PMU_TDC0_REF_CAL_CNT_OFFS) +#define PMU_TDC0_AVG_NUM_OFFS 25 +#define PMU_TDC0_AVG_NUM_MASK (0x7 << PMU_TDC0_AVG_NUM_OFFS) + +/* Dove Thermal Diode Control 1 Register */ +#define PMU_TEMP_DIOD_CTRL1_REG 0x04 +#define PMU_TDC1_TEMP_VALID_MASK (0x1 << 10) + +/* Dove Thermal Sensor Dev Structure */ +struct dove_thermal_priv { + void __iomem *sensor; + void __iomem *control; +}; + +static int dove_init_sensor(const struct dove_thermal_priv *priv) +{ + u32 reg; + u32 i; + + /* Configure the Diode Control Register #0 */ + reg = readl_relaxed(priv->control); + + /* Use average of 2 */ + reg &= ~PMU_TDC0_AVG_NUM_MASK; + reg |= (0x1 << PMU_TDC0_AVG_NUM_OFFS); + + /* Reference calibration value */ + reg &= ~PMU_TDC0_REF_CAL_CNT_MASK; + reg |= (0x0F1 << PMU_TDC0_REF_CAL_CNT_OFFS); + + /* Set the high level reference for calibration */ + reg &= ~PMU_TDC0_SEL_VCAL_MASK; + reg |= (0x2 << PMU_TDC0_SEL_VCAL_OFFS); + writel(reg, priv->control); + + /* Reset the sensor */ + reg = readl_relaxed(priv->control); + writel((reg | PMU_TDC0_SW_RST_MASK), priv->control); + writel(reg, priv->control); + + /* Enable the sensor */ + reg = readl_relaxed(priv->sensor); + reg &= ~PMU_TM_DISABLE_MASK; + writel(reg, priv->sensor); + + /* Poll the sensor for the first reading */ + for (i = 0; i < 1000000; i++) { + reg = readl_relaxed(priv->sensor); + if (reg & DOVE_THERMAL_TEMP_MASK) + break; + } + + if (i == 1000000) + return -EIO; + + return 0; +} + +static int dove_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + unsigned long reg; + struct dove_thermal_priv *priv = thermal->devdata; + + /* Valid check */ + reg = readl_relaxed(priv->control + PMU_TEMP_DIOD_CTRL1_REG); + if ((reg & PMU_TDC1_TEMP_VALID_MASK) == 0x0) { + dev_err(&thermal->device, + "Temperature sensor reading not valid\n"); + return -EIO; + } + + /* + * Calculate temperature. See Section 8.10.1 of 88AP510, + * Documentation/arm/Marvell/README + */ + reg = readl_relaxed(priv->sensor); + reg = (reg >> DOVE_THERMAL_TEMP_OFFSET) & DOVE_THERMAL_TEMP_MASK; + *temp = ((2281638UL - (7298*reg)) / 10); + + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = dove_get_temp, +}; + +static const struct of_device_id dove_thermal_id_table[] = { + { .compatible = "marvell,dove-thermal" }, + {} +}; + +static int dove_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *thermal = NULL; + struct dove_thermal_priv *priv; + struct resource *res; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->sensor = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->sensor) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + priv->control = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->control) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + ret = dove_init_sensor(priv); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize sensor\n"); + return ret; + } + + thermal = thermal_zone_device_register("dove_thermal", 0, 0, + priv, &ops, NULL, 0, 0); + if (IS_ERR(thermal)) { + dev_err(&pdev->dev, + "Failed to register thermal zone device\n"); + return PTR_ERR(thermal); + } + + platform_set_drvdata(pdev, thermal); + + return 0; +} + +static int dove_thermal_exit(struct platform_device *pdev) +{ + struct thermal_zone_device *dove_thermal = + platform_get_drvdata(pdev); + + thermal_zone_device_unregister(dove_thermal); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +MODULE_DEVICE_TABLE(of, dove_thermal_id_table); + +static struct platform_driver dove_thermal_driver = { + .probe = dove_thermal_probe, + .remove = dove_thermal_exit, + .driver = { + .name = "dove_thermal", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(dove_thermal_id_table), + }, +}; + +module_platform_driver(dove_thermal_driver); + +MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>"); +MODULE_DESCRIPTION("Dove thermal driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c index bada1308318b..e04ebd8671ac 100644 --- a/drivers/thermal/exynos_thermal.c +++ b/drivers/thermal/exynos_thermal.c @@ -82,7 +82,7 @@ #define EXYNOS_TRIMINFO_RELOAD 0x1 #define EXYNOS_TMU_CLEAR_RISE_INT 0x111 -#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 16) +#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 12) #define EXYNOS_MUX_ADDR_VALUE 6 #define EXYNOS_MUX_ADDR_SHIFT 20 #define EXYNOS_TMU_TRIP_MODE_SHIFT 13 @@ -94,11 +94,20 @@ #define SENSOR_NAME_LEN 16 #define MAX_TRIP_COUNT 8 #define MAX_COOLING_DEVICE 4 +#define MAX_THRESHOLD_LEVS 4 #define ACTIVE_INTERVAL 500 #define IDLE_INTERVAL 10000 #define MCELSIUS 1000 +#ifdef CONFIG_EXYNOS_THERMAL_EMUL +#define EXYNOS_EMUL_TIME 0x57F0 +#define EXYNOS_EMUL_TIME_SHIFT 16 +#define EXYNOS_EMUL_DATA_SHIFT 8 +#define EXYNOS_EMUL_DATA_MASK 0xFF +#define EXYNOS_EMUL_ENABLE 0x1 +#endif /* CONFIG_EXYNOS_THERMAL_EMUL */ + /* CPU Zone information */ #define PANIC_ZONE 4 #define WARN_ZONE 3 @@ -125,6 +134,7 @@ struct exynos_tmu_data { struct thermal_trip_point_conf { int trip_val[MAX_TRIP_COUNT]; int trip_count; + u8 trigger_falling; }; struct thermal_cooling_conf { @@ -174,7 +184,8 @@ static int exynos_set_mode(struct thermal_zone_device *thermal, mutex_lock(&th_zone->therm_dev->lock); - if (mode == THERMAL_DEVICE_ENABLED) + if (mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) th_zone->therm_dev->polling_delay = IDLE_INTERVAL; else th_zone->therm_dev->polling_delay = 0; @@ -284,7 +295,7 @@ static int exynos_bind(struct thermal_zone_device *thermal, case MONITOR_ZONE: case WARN_ZONE: if (thermal_zone_bind_cooling_device(thermal, i, cdev, - level, level)) { + level, 0)) { pr_err("error binding cdev inst %d\n", i); ret = -EINVAL; } @@ -362,10 +373,17 @@ static int exynos_get_temp(struct thermal_zone_device *thermal, static int exynos_get_trend(struct thermal_zone_device *thermal, int trip, enum thermal_trend *trend) { - if (thermal->temperature >= trip) - *trend = THERMAL_TREND_RAISING; + int ret; + unsigned long trip_temp; + + ret = exynos_get_trip_temp(thermal, trip, &trip_temp); + if (ret < 0) + return ret; + + if (thermal->temperature >= trip_temp) + *trend = THERMAL_TREND_RAISE_FULL; else - *trend = THERMAL_TREND_DROPPING; + *trend = THERMAL_TREND_DROP_FULL; return 0; } @@ -413,7 +431,8 @@ static void exynos_report_trigger(void) break; } - if (th_zone->mode == THERMAL_DEVICE_ENABLED) { + if (th_zone->mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) { if (i > 0) th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL; else @@ -452,7 +471,8 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name, EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0, - IDLE_INTERVAL); + sensor_conf->trip_data.trigger_falling ? + 0 : IDLE_INTERVAL); if (IS_ERR(th_zone->therm_dev)) { pr_err("Failed to register thermal zone device\n"); @@ -559,8 +579,9 @@ static int exynos_tmu_initialize(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct exynos_tmu_platform_data *pdata = data->pdata; - unsigned int status, trim_info, rising_threshold; - int ret = 0, threshold_code; + unsigned int status, trim_info; + unsigned int rising_threshold = 0, falling_threshold = 0; + int ret = 0, threshold_code, i, trigger_levs = 0; mutex_lock(&data->lock); clk_enable(data->clk); @@ -585,6 +606,11 @@ static int exynos_tmu_initialize(struct platform_device *pdev) (data->temp_error2 != 0)) data->temp_error1 = pdata->efuse_value; + /* Count trigger levels to be enabled */ + for (i = 0; i < MAX_THRESHOLD_LEVS; i++) + if (pdata->trigger_levels[i]) + trigger_levs++; + if (data->soc == SOC_ARCH_EXYNOS4210) { /* Write temperature code for threshold */ threshold_code = temp_to_code(data, pdata->threshold); @@ -594,44 +620,38 @@ static int exynos_tmu_initialize(struct platform_device *pdev) } writeb(threshold_code, data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP); - - writeb(pdata->trigger_levels[0], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0); - writeb(pdata->trigger_levels[1], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL1); - writeb(pdata->trigger_levels[2], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL2); - writeb(pdata->trigger_levels[3], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL3); + for (i = 0; i < trigger_levs; i++) + writeb(pdata->trigger_levels[i], + data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4); writel(EXYNOS4210_TMU_INTCLEAR_VAL, data->base + EXYNOS_TMU_REG_INTCLEAR); } else if (data->soc == SOC_ARCH_EXYNOS) { - /* Write temperature code for threshold */ - threshold_code = temp_to_code(data, pdata->trigger_levels[0]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold = threshold_code; - threshold_code = temp_to_code(data, pdata->trigger_levels[1]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold |= (threshold_code << 8); - threshold_code = temp_to_code(data, pdata->trigger_levels[2]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; + /* Write temperature code for rising and falling threshold */ + for (i = 0; i < trigger_levs; i++) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i]); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + rising_threshold |= threshold_code << 8 * i; + if (pdata->threshold_falling) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i] - + pdata->threshold_falling); + if (threshold_code > 0) + falling_threshold |= + threshold_code << 8 * i; + } } - rising_threshold |= (threshold_code << 16); writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE); - writel(0, data->base + EXYNOS_THD_TEMP_FALL); + writel(falling_threshold, + data->base + EXYNOS_THD_TEMP_FALL); - writel(EXYNOS_TMU_CLEAR_RISE_INT|EXYNOS_TMU_CLEAR_FALL_INT, + writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT, data->base + EXYNOS_TMU_REG_INTCLEAR); } out: @@ -664,6 +684,8 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on) pdata->trigger_level2_en << 8 | pdata->trigger_level1_en << 4 | pdata->trigger_level0_en; + if (pdata->threshold_falling) + interrupt_en |= interrupt_en << 16; } else { con |= EXYNOS_TMU_CORE_OFF; interrupt_en = 0; /* Disable all interrupts */ @@ -697,20 +719,19 @@ static void exynos_tmu_work(struct work_struct *work) struct exynos_tmu_data *data = container_of(work, struct exynos_tmu_data, irq_work); + exynos_report_trigger(); mutex_lock(&data->lock); clk_enable(data->clk); - - if (data->soc == SOC_ARCH_EXYNOS) - writel(EXYNOS_TMU_CLEAR_RISE_INT, + writel(EXYNOS_TMU_CLEAR_RISE_INT | + EXYNOS_TMU_CLEAR_FALL_INT, data->base + EXYNOS_TMU_REG_INTCLEAR); else writel(EXYNOS4210_TMU_INTCLEAR_VAL, data->base + EXYNOS_TMU_REG_INTCLEAR); - clk_disable(data->clk); mutex_unlock(&data->lock); - exynos_report_trigger(); + enable_irq(data->irq); } @@ -759,6 +780,7 @@ static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = { #if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412) static struct exynos_tmu_platform_data const exynos_default_tmu_data = { + .threshold_falling = 10, .trigger_levels[0] = 85, .trigger_levels[1] = 103, .trigger_levels[2] = 110, @@ -800,8 +822,6 @@ static const struct of_device_id exynos_tmu_match[] = { {}, }; MODULE_DEVICE_TABLE(of, exynos_tmu_match); -#else -#define exynos_tmu_match NULL #endif static struct platform_device_id exynos_tmu_driver_ids[] = { @@ -832,6 +852,94 @@ static inline struct exynos_tmu_platform_data *exynos_get_driver_data( return (struct exynos_tmu_platform_data *) platform_get_device_id(pdev)->driver_data; } + +#ifdef CONFIG_EXYNOS_THERMAL_EMUL +static ssize_t exynos_tmu_emulation_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + unsigned int reg; + u8 temp_code; + int temp = 0; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + + mutex_lock(&data->lock); + clk_enable(data->clk); + reg = readl(data->base + EXYNOS_EMUL_CON); + clk_disable(data->clk); + mutex_unlock(&data->lock); + + if (reg & EXYNOS_EMUL_ENABLE) { + reg >>= EXYNOS_EMUL_DATA_SHIFT; + temp_code = reg & EXYNOS_EMUL_DATA_MASK; + temp = code_to_temp(data, temp_code); + } +out: + return sprintf(buf, "%d\n", temp * MCELSIUS); +} + +static ssize_t exynos_tmu_emulation_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + unsigned int reg; + int temp; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + + if (!sscanf(buf, "%d\n", &temp) || temp < 0) + return -EINVAL; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + reg = readl(data->base + EXYNOS_EMUL_CON); + + if (temp) { + /* Both CELSIUS and MCELSIUS type are available for input */ + if (temp > MCELSIUS) + temp /= MCELSIUS; + + reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) | + (temp_to_code(data, (temp / MCELSIUS)) + << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE; + } else { + reg &= ~EXYNOS_EMUL_ENABLE; + } + + writel(reg, data->base + EXYNOS_EMUL_CON); + + clk_disable(data->clk); + mutex_unlock(&data->lock); + +out: + return count; +} + +static DEVICE_ATTR(emulation, 0644, exynos_tmu_emulation_show, + exynos_tmu_emulation_store); +static int create_emulation_sysfs(struct device *dev) +{ + return device_create_file(dev, &dev_attr_emulation); +} +static void remove_emulation_sysfs(struct device *dev) +{ + device_remove_file(dev, &dev_attr_emulation); +} +#else +static inline int create_emulation_sysfs(struct device *dev) { return 0; } +static inline void remove_emulation_sysfs(struct device *dev) {} +#endif + static int exynos_tmu_probe(struct platform_device *pdev) { struct exynos_tmu_data *data; @@ -914,6 +1022,8 @@ static int exynos_tmu_probe(struct platform_device *pdev) exynos_sensor_conf.trip_data.trip_val[i] = pdata->threshold + pdata->trigger_levels[i]; + exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling; + exynos_sensor_conf.cooling_data.freq_clip_count = pdata->freq_tab_count; for (i = 0; i < pdata->freq_tab_count; i++) { @@ -928,6 +1038,11 @@ static int exynos_tmu_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Failed to register thermal interface\n"); goto err_clk; } + + ret = create_emulation_sysfs(&pdev->dev); + if (ret) + dev_err(&pdev->dev, "Failed to create emulation mode sysfs node\n"); + return 0; err_clk: platform_set_drvdata(pdev, NULL); @@ -939,6 +1054,8 @@ static int exynos_tmu_remove(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); + remove_emulation_sysfs(&pdev->dev); + exynos_tmu_control(pdev, false); exynos_unregister_thermal(); @@ -980,7 +1097,7 @@ static struct platform_driver exynos_tmu_driver = { .name = "exynos-tmu", .owner = THIS_MODULE, .pm = EXYNOS_TMU_PM, - .of_match_table = exynos_tmu_match, + .of_match_table = of_match_ptr(exynos_tmu_match), }, .probe = exynos_tmu_probe, .remove = exynos_tmu_remove, diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c new file mode 100644 index 000000000000..b40b37cd25e0 --- /dev/null +++ b/drivers/thermal/intel_powerclamp.c @@ -0,0 +1,795 @@ +/* + * intel_powerclamp.c - package c-state idle injection + * + * Copyright (c) 2012, Intel Corporation. + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + * Jacob Pan <jacob.jun.pan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * + * TODO: + * 1. better handle wakeup from external interrupts, currently a fixed + * compensation is added to clamping duration when excessive amount + * of wakeups are observed during idle time. the reason is that in + * case of external interrupts without need for ack, clamping down + * cpu in non-irq context does not reduce irq. for majority of the + * cases, clamping down cpu does help reduce irq as well, we should + * be able to differenciate the two cases and give a quantitative + * solution for the irqs that we can control. perhaps based on + * get_cpu_iowait_time_us() + * + * 2. synchronization with other hw blocks + * + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/freezer.h> +#include <linux/cpu.h> +#include <linux/thermal.h> +#include <linux/slab.h> +#include <linux/tick.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/sched/rt.h> + +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/mwait.h> +#include <asm/cpu_device_id.h> +#include <asm/idle.h> +#include <asm/hardirq.h> + +#define MAX_TARGET_RATIO (50U) +/* For each undisturbed clamping period (no extra wake ups during idle time), + * we increment the confidence counter for the given target ratio. + * CONFIDENCE_OK defines the level where runtime calibration results are + * valid. + */ +#define CONFIDENCE_OK (3) +/* Default idle injection duration, driver adjust sleep time to meet target + * idle ratio. Similar to frequency modulation. + */ +#define DEFAULT_DURATION_JIFFIES (6) + +static unsigned int target_mwait; +static struct dentry *debug_dir; + +/* user selected target */ +static unsigned int set_target_ratio; +static unsigned int current_ratio; +static bool should_skip; +static bool reduce_irq; +static atomic_t idle_wakeup_counter; +static unsigned int control_cpu; /* The cpu assigned to collect stat and update + * control parameters. default to BSP but BSP + * can be offlined. + */ +static bool clamping; + + +static struct task_struct * __percpu *powerclamp_thread; +static struct thermal_cooling_device *cooling_dev; +static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu + * clamping thread + */ + +static unsigned int duration; +static unsigned int pkg_cstate_ratio_cur; +static unsigned int window_size; + +static int duration_set(const char *arg, const struct kernel_param *kp) +{ + int ret = 0; + unsigned long new_duration; + + ret = kstrtoul(arg, 10, &new_duration); + if (ret) + goto exit; + if (new_duration > 25 || new_duration < 6) { + pr_err("Out of recommended range %lu, between 6-25ms\n", + new_duration); + ret = -EINVAL; + } + + duration = clamp(new_duration, 6ul, 25ul); + smp_mb(); + +exit: + + return ret; +} + +static struct kernel_param_ops duration_ops = { + .set = duration_set, + .get = param_get_int, +}; + + +module_param_cb(duration, &duration_ops, &duration, 0644); +MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); + +struct powerclamp_calibration_data { + unsigned long confidence; /* used for calibration, basically a counter + * gets incremented each time a clamping + * period is completed without extra wakeups + * once that counter is reached given level, + * compensation is deemed usable. + */ + unsigned long steady_comp; /* steady state compensation used when + * no extra wakeups occurred. + */ + unsigned long dynamic_comp; /* compensate excessive wakeup from idle + * mostly from external interrupts. + */ +}; + +static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO]; + +static int window_size_set(const char *arg, const struct kernel_param *kp) +{ + int ret = 0; + unsigned long new_window_size; + + ret = kstrtoul(arg, 10, &new_window_size); + if (ret) + goto exit_win; + if (new_window_size > 10 || new_window_size < 2) { + pr_err("Out of recommended window size %lu, between 2-10\n", + new_window_size); + ret = -EINVAL; + } + + window_size = clamp(new_window_size, 2ul, 10ul); + smp_mb(); + +exit_win: + + return ret; +} + +static struct kernel_param_ops window_size_ops = { + .set = window_size_set, + .get = param_get_int, +}; + +module_param_cb(window_size, &window_size_ops, &window_size, 0644); +MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n" + "\tpowerclamp controls idle ratio within this window. larger\n" + "\twindow size results in slower response time but more smooth\n" + "\tclamping results. default to 2."); + +static void find_target_mwait(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); + + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || + !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) + return; + + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + +} + +static u64 pkg_state_counter(void) +{ + u64 val; + u64 count = 0; + + static bool skip_c2; + static bool skip_c3; + static bool skip_c6; + static bool skip_c7; + + if (!skip_c2) { + if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val)) + count += val; + else + skip_c2 = true; + } + + if (!skip_c3) { + if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val)) + count += val; + else + skip_c3 = true; + } + + if (!skip_c6) { + if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val)) + count += val; + else + skip_c6 = true; + } + + if (!skip_c7) { + if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val)) + count += val; + else + skip_c7 = true; + } + + return count; +} + +static void noop_timer(unsigned long foo) +{ + /* empty... just the fact that we get the interrupt wakes us up */ +} + +static unsigned int get_compensation(int ratio) +{ + unsigned int comp = 0; + + /* we only use compensation if all adjacent ones are good */ + if (ratio == 1 && + cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio + 1].confidence >= CONFIDENCE_OK && + cal_data[ratio + 2].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio + 1].steady_comp + + cal_data[ratio + 2].steady_comp) / 3; + } else if (ratio == MAX_TARGET_RATIO - 1 && + cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio - 1].confidence >= CONFIDENCE_OK && + cal_data[ratio - 2].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio - 1].steady_comp + + cal_data[ratio - 2].steady_comp) / 3; + } else if (cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio - 1].confidence >= CONFIDENCE_OK && + cal_data[ratio + 1].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio - 1].steady_comp + + cal_data[ratio + 1].steady_comp) / 3; + } + + /* REVISIT: simple penalty of double idle injection */ + if (reduce_irq) + comp = ratio; + /* do not exceed limit */ + if (comp + ratio >= MAX_TARGET_RATIO) + comp = MAX_TARGET_RATIO - ratio - 1; + + return comp; +} + +static void adjust_compensation(int target_ratio, unsigned int win) +{ + int delta; + struct powerclamp_calibration_data *d = &cal_data[target_ratio]; + + /* + * adjust compensations if confidence level has not been reached or + * there are too many wakeups during the last idle injection period, we + * cannot trust the data for compensation. + */ + if (d->confidence >= CONFIDENCE_OK || + atomic_read(&idle_wakeup_counter) > + win * num_online_cpus()) + return; + + delta = set_target_ratio - current_ratio; + /* filter out bad data */ + if (delta >= 0 && delta <= (1+target_ratio/10)) { + if (d->steady_comp) + d->steady_comp = + roundup(delta+d->steady_comp, 2)/2; + else + d->steady_comp = delta; + d->confidence++; + } +} + +static bool powerclamp_adjust_controls(unsigned int target_ratio, + unsigned int guard, unsigned int win) +{ + static u64 msr_last, tsc_last; + u64 msr_now, tsc_now; + u64 val64; + + /* check result for the last window */ + msr_now = pkg_state_counter(); + rdtscll(tsc_now); + + /* calculate pkg cstate vs tsc ratio */ + if (!msr_last || !tsc_last) + current_ratio = 1; + else if (tsc_now-tsc_last) { + val64 = 100*(msr_now-msr_last); + do_div(val64, (tsc_now-tsc_last)); + current_ratio = val64; + } + + /* update record */ + msr_last = msr_now; + tsc_last = tsc_now; + + adjust_compensation(target_ratio, win); + /* + * too many external interrupts, set flag such + * that we can take measure later. + */ + reduce_irq = atomic_read(&idle_wakeup_counter) >= + 2 * win * num_online_cpus(); + + atomic_set(&idle_wakeup_counter, 0); + /* if we are above target+guard, skip */ + return set_target_ratio + guard <= current_ratio; +} + +static int clamp_thread(void *arg) +{ + int cpunr = (unsigned long)arg; + DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); + static const struct sched_param param = { + .sched_priority = MAX_USER_RT_PRIO/2, + }; + unsigned int count = 0; + unsigned int target_ratio; + + set_bit(cpunr, cpu_clamping_mask); + set_freezable(); + init_timer_on_stack(&wakeup_timer); + sched_setscheduler(current, SCHED_FIFO, ¶m); + + while (true == clamping && !kthread_should_stop() && + cpu_online(cpunr)) { + int sleeptime; + unsigned long target_jiffies; + unsigned int guard; + unsigned int compensation = 0; + int interval; /* jiffies to sleep for each attempt */ + unsigned int duration_jiffies = msecs_to_jiffies(duration); + unsigned int window_size_now; + + try_to_freeze(); + /* + * make sure user selected ratio does not take effect until + * the next round. adjust target_ratio if user has changed + * target such that we can converge quickly. + */ + target_ratio = set_target_ratio; + guard = 1 + target_ratio/20; + window_size_now = window_size; + count++; + + /* + * systems may have different ability to enter package level + * c-states, thus we need to compensate the injected idle ratio + * to achieve the actual target reported by the HW. + */ + compensation = get_compensation(target_ratio); + interval = duration_jiffies*100/(target_ratio+compensation); + + /* align idle time */ + target_jiffies = roundup(jiffies, interval); + sleeptime = target_jiffies - jiffies; + if (sleeptime <= 0) + sleeptime = 1; + schedule_timeout_interruptible(sleeptime); + /* + * only elected controlling cpu can collect stats and update + * control parameters. + */ + if (cpunr == control_cpu && !(count%window_size_now)) { + should_skip = + powerclamp_adjust_controls(target_ratio, + guard, window_size_now); + smp_mb(); + } + + if (should_skip) + continue; + + target_jiffies = jiffies + duration_jiffies; + mod_timer(&wakeup_timer, target_jiffies); + if (unlikely(local_softirq_pending())) + continue; + /* + * stop tick sched during idle time, interrupts are still + * allowed. thus jiffies are updated properly. + */ + preempt_disable(); + tick_nohz_idle_enter(); + /* mwait until target jiffies is reached */ + while (time_before(jiffies, target_jiffies)) { + unsigned long ecx = 1; + unsigned long eax = target_mwait; + + /* + * REVISIT: may call enter_idle() to notify drivers who + * can save power during cpu idle. same for exit_idle() + */ + local_touch_nmi(); + stop_critical_timings(); + __monitor((void *)¤t_thread_info()->flags, 0, 0); + cpu_relax(); /* allow HT sibling to run */ + __mwait(eax, ecx); + start_critical_timings(); + atomic_inc(&idle_wakeup_counter); + } + tick_nohz_idle_exit(); + preempt_enable_no_resched(); + } + del_timer_sync(&wakeup_timer); + clear_bit(cpunr, cpu_clamping_mask); + + return 0; +} + +/* + * 1 HZ polling while clamping is active, useful for userspace + * to monitor actual idle ratio. + */ +static void poll_pkg_cstate(struct work_struct *dummy); +static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate); +static void poll_pkg_cstate(struct work_struct *dummy) +{ + static u64 msr_last; + static u64 tsc_last; + static unsigned long jiffies_last; + + u64 msr_now; + unsigned long jiffies_now; + u64 tsc_now; + u64 val64; + + msr_now = pkg_state_counter(); + rdtscll(tsc_now); + jiffies_now = jiffies; + + /* calculate pkg cstate vs tsc ratio */ + if (!msr_last || !tsc_last) + pkg_cstate_ratio_cur = 1; + else { + if (tsc_now - tsc_last) { + val64 = 100 * (msr_now - msr_last); + do_div(val64, (tsc_now - tsc_last)); + pkg_cstate_ratio_cur = val64; + } + } + + /* update record */ + msr_last = msr_now; + jiffies_last = jiffies_now; + tsc_last = tsc_now; + + if (true == clamping) + schedule_delayed_work(&poll_pkg_cstate_work, HZ); +} + +static int start_power_clamp(void) +{ + unsigned long cpu; + struct task_struct *thread; + + /* check if pkg cstate counter is completely 0, abort in this case */ + if (!pkg_state_counter()) { + pr_err("pkg cstate counter not functional, abort\n"); + return -EINVAL; + } + + set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); + /* prevent cpu hotplug */ + get_online_cpus(); + + /* prefer BSP */ + control_cpu = 0; + if (!cpu_online(control_cpu)) + control_cpu = smp_processor_id(); + + clamping = true; + schedule_delayed_work(&poll_pkg_cstate_work, 0); + + /* start one thread per online cpu */ + for_each_online_cpu(cpu) { + struct task_struct **p = + per_cpu_ptr(powerclamp_thread, cpu); + + thread = kthread_create_on_node(clamp_thread, + (void *) cpu, + cpu_to_node(cpu), + "kidle_inject/%ld", cpu); + /* bind to cpu here */ + if (likely(!IS_ERR(thread))) { + kthread_bind(thread, cpu); + wake_up_process(thread); + *p = thread; + } + + } + put_online_cpus(); + + return 0; +} + +static void end_power_clamp(void) +{ + int i; + struct task_struct *thread; + + clamping = false; + /* + * make clamping visible to other cpus and give per cpu clamping threads + * sometime to exit, or gets killed later. + */ + smp_mb(); + msleep(20); + if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { + for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { + pr_debug("clamping thread for cpu %d alive, kill\n", i); + thread = *per_cpu_ptr(powerclamp_thread, i); + kthread_stop(thread); + } + } +} + +static int powerclamp_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + struct task_struct *thread; + struct task_struct **percpu_thread = + per_cpu_ptr(powerclamp_thread, cpu); + + if (false == clamping) + goto exit_ok; + + switch (action) { + case CPU_ONLINE: + thread = kthread_create_on_node(clamp_thread, + (void *) cpu, + cpu_to_node(cpu), + "kidle_inject/%lu", cpu); + if (likely(!IS_ERR(thread))) { + kthread_bind(thread, cpu); + wake_up_process(thread); + *percpu_thread = thread; + } + /* prefer BSP as controlling CPU */ + if (cpu == 0) { + control_cpu = 0; + smp_mb(); + } + break; + case CPU_DEAD: + if (test_bit(cpu, cpu_clamping_mask)) { + pr_err("cpu %lu dead but powerclamping thread is not\n", + cpu); + kthread_stop(*percpu_thread); + } + if (cpu == control_cpu) { + control_cpu = smp_processor_id(); + smp_mb(); + } + } + +exit_ok: + return NOTIFY_OK; +} + +static struct notifier_block powerclamp_cpu_notifier = { + .notifier_call = powerclamp_cpu_callback, +}; + +static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + *state = MAX_TARGET_RATIO; + + return 0; +} + +static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + if (true == clamping) + *state = pkg_cstate_ratio_cur; + else + /* to save power, do not poll idle ratio while not clamping */ + *state = -1; /* indicates invalid state */ + + return 0; +} + +static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, + unsigned long new_target_ratio) +{ + int ret = 0; + + new_target_ratio = clamp(new_target_ratio, 0UL, + (unsigned long) (MAX_TARGET_RATIO-1)); + if (set_target_ratio == 0 && new_target_ratio > 0) { + pr_info("Start idle injection to reduce power\n"); + set_target_ratio = new_target_ratio; + ret = start_power_clamp(); + goto exit_set; + } else if (set_target_ratio > 0 && new_target_ratio == 0) { + pr_info("Stop forced idle injection\n"); + set_target_ratio = 0; + end_power_clamp(); + } else /* adjust currently running */ { + set_target_ratio = new_target_ratio; + /* make new set_target_ratio visible to other cpus */ + smp_mb(); + } + +exit_set: + return ret; +} + +/* bind to generic thermal layer as cooling device*/ +static struct thermal_cooling_device_ops powerclamp_cooling_ops = { + .get_max_state = powerclamp_get_max_state, + .get_cur_state = powerclamp_get_cur_state, + .set_cur_state = powerclamp_set_cur_state, +}; + +/* runs on Nehalem and later */ +static const struct x86_cpu_id intel_powerclamp_ids[] = { + { X86_VENDOR_INTEL, 6, 0x1a}, + { X86_VENDOR_INTEL, 6, 0x1c}, + { X86_VENDOR_INTEL, 6, 0x1e}, + { X86_VENDOR_INTEL, 6, 0x1f}, + { X86_VENDOR_INTEL, 6, 0x25}, + { X86_VENDOR_INTEL, 6, 0x26}, + { X86_VENDOR_INTEL, 6, 0x2a}, + { X86_VENDOR_INTEL, 6, 0x2c}, + { X86_VENDOR_INTEL, 6, 0x2d}, + { X86_VENDOR_INTEL, 6, 0x2e}, + { X86_VENDOR_INTEL, 6, 0x2f}, + { X86_VENDOR_INTEL, 6, 0x3a}, + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); + +static int powerclamp_probe(void) +{ + if (!x86_match_cpu(intel_powerclamp_ids)) { + pr_err("Intel powerclamp does not run on family %d model %d\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); + return -ENODEV; + } + if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || + !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) || + !boot_cpu_has(X86_FEATURE_MWAIT) || + !boot_cpu_has(X86_FEATURE_ARAT)) + return -ENODEV; + + /* find the deepest mwait value */ + find_target_mwait(); + + return 0; +} + +static int powerclamp_debug_show(struct seq_file *m, void *unused) +{ + int i = 0; + + seq_printf(m, "controlling cpu: %d\n", control_cpu); + seq_printf(m, "pct confidence steady dynamic (compensation)\n"); + for (i = 0; i < MAX_TARGET_RATIO; i++) { + seq_printf(m, "%d\t%lu\t%lu\t%lu\n", + i, + cal_data[i].confidence, + cal_data[i].steady_comp, + cal_data[i].dynamic_comp); + } + + return 0; +} + +static int powerclamp_debug_open(struct inode *inode, + struct file *file) +{ + return single_open(file, powerclamp_debug_show, inode->i_private); +} + +static const struct file_operations powerclamp_debug_fops = { + .open = powerclamp_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static inline void powerclamp_create_debug_files(void) +{ + debug_dir = debugfs_create_dir("intel_powerclamp", NULL); + if (!debug_dir) + return; + + if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, + cal_data, &powerclamp_debug_fops)) + goto file_error; + + return; + +file_error: + debugfs_remove_recursive(debug_dir); +} + +static int powerclamp_init(void) +{ + int retval; + int bitmap_size; + + bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long); + cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL); + if (!cpu_clamping_mask) + return -ENOMEM; + + /* probe cpu features and ids here */ + retval = powerclamp_probe(); + if (retval) + return retval; + /* set default limit, maybe adjusted during runtime based on feedback */ + window_size = 2; + register_hotcpu_notifier(&powerclamp_cpu_notifier); + powerclamp_thread = alloc_percpu(struct task_struct *); + cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, + &powerclamp_cooling_ops); + if (IS_ERR(cooling_dev)) + return -ENODEV; + + if (!duration) + duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES); + powerclamp_create_debug_files(); + + return 0; +} +module_init(powerclamp_init); + +static void powerclamp_exit(void) +{ + unregister_hotcpu_notifier(&powerclamp_cpu_notifier); + end_power_clamp(); + free_percpu(powerclamp_thread); + thermal_cooling_device_unregister(cooling_dev); + kfree(cpu_clamping_mask); + + cancel_delayed_work_sync(&poll_pkg_cstate_work); + debugfs_remove_recursive(debug_dir); +} +module_exit(powerclamp_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); +MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>"); +MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs"); diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c new file mode 100644 index 000000000000..65cb4f09e8f6 --- /dev/null +++ b/drivers/thermal/kirkwood_thermal.c @@ -0,0 +1,134 @@ +/* + * Kirkwood thermal sensor driver + * + * Copyright (C) 2012 Nobuhiro Iwamatsu <iwamatsu@nigauri.org> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/thermal.h> + +#define KIRKWOOD_THERMAL_VALID_OFFSET 9 +#define KIRKWOOD_THERMAL_VALID_MASK 0x1 +#define KIRKWOOD_THERMAL_TEMP_OFFSET 10 +#define KIRKWOOD_THERMAL_TEMP_MASK 0x1FF + +/* Kirkwood Thermal Sensor Dev Structure */ +struct kirkwood_thermal_priv { + void __iomem *sensor; +}; + +static int kirkwood_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + unsigned long reg; + struct kirkwood_thermal_priv *priv = thermal->devdata; + + reg = readl_relaxed(priv->sensor); + + /* Valid check */ + if (!(reg >> KIRKWOOD_THERMAL_VALID_OFFSET) & + KIRKWOOD_THERMAL_VALID_MASK) { + dev_err(&thermal->device, + "Temperature sensor reading not valid\n"); + return -EIO; + } + + /* + * Calculate temperature. See Section 8.10.1 of the 88AP510, + * datasheet, which has the same sensor. + * Documentation/arm/Marvell/README + */ + reg = (reg >> KIRKWOOD_THERMAL_TEMP_OFFSET) & + KIRKWOOD_THERMAL_TEMP_MASK; + *temp = ((2281638UL - (7298*reg)) / 10); + + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = kirkwood_get_temp, +}; + +static const struct of_device_id kirkwood_thermal_id_table[] = { + { .compatible = "marvell,kirkwood-thermal" }, + {} +}; + +static int kirkwood_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *thermal = NULL; + struct kirkwood_thermal_priv *priv; + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->sensor = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->sensor) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0, + priv, &ops, NULL, 0, 0); + if (IS_ERR(thermal)) { + dev_err(&pdev->dev, + "Failed to register thermal zone device\n"); + return PTR_ERR(thermal); + } + + platform_set_drvdata(pdev, thermal); + + return 0; +} + +static int kirkwood_thermal_exit(struct platform_device *pdev) +{ + struct thermal_zone_device *kirkwood_thermal = + platform_get_drvdata(pdev); + + thermal_zone_device_unregister(kirkwood_thermal); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +MODULE_DEVICE_TABLE(of, kirkwood_thermal_id_table); + +static struct platform_driver kirkwood_thermal_driver = { + .probe = kirkwood_thermal_probe, + .remove = kirkwood_thermal_exit, + .driver = { + .name = "kirkwood_thermal", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(kirkwood_thermal_id_table), + }, +}; + +module_platform_driver(kirkwood_thermal_driver); + +MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu@nigauri.org>"); +MODULE_DESCRIPTION("kirkwood thermal driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 90db951725da..28f091994013 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -19,225 +19,473 @@ */ #include <linux/delay.h> #include <linux/err.h> +#include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/io.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/thermal.h> -#define THSCR 0x2c -#define THSSR 0x30 +#define IDLE_INTERVAL 5000 + +#define COMMON_STR 0x00 +#define COMMON_ENR 0x04 +#define COMMON_INTMSK 0x0c + +#define REG_POSNEG 0x20 +#define REG_FILONOFF 0x28 +#define REG_THSCR 0x2c +#define REG_THSSR 0x30 +#define REG_INTCTRL 0x34 /* THSCR */ -#define CPTAP 0xf +#define CPCTL (1 << 12) /* THSSR */ #define CTEMP 0x3f - -struct rcar_thermal_priv { +struct rcar_thermal_common { void __iomem *base; struct device *dev; + struct list_head head; spinlock_t lock; - u32 comp; }; +struct rcar_thermal_priv { + void __iomem *base; + struct rcar_thermal_common *common; + struct thermal_zone_device *zone; + struct delayed_work work; + struct mutex lock; + struct list_head list; + int id; + int ctemp; +}; + +#define rcar_thermal_for_each_priv(pos, common) \ + list_for_each_entry(pos, &common->head, list) + #define MCELSIUS(temp) ((temp) * 1000) -#define rcar_zone_to_priv(zone) (zone->devdata) +#define rcar_zone_to_priv(zone) ((zone)->devdata) +#define rcar_priv_to_dev(priv) ((priv)->common->dev) +#define rcar_has_irq_support(priv) ((priv)->common->base) +#define rcar_id_to_shift(priv) ((priv)->id * 8) + +#ifdef DEBUG +# define rcar_force_update_temp(priv) 1 +#else +# define rcar_force_update_temp(priv) 0 +#endif /* * basic functions */ -static u32 rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg) +#define rcar_thermal_common_read(c, r) \ + _rcar_thermal_common_read(c, COMMON_ ##r) +static u32 _rcar_thermal_common_read(struct rcar_thermal_common *common, + u32 reg) { - unsigned long flags; - u32 ret; - - spin_lock_irqsave(&priv->lock, flags); + return ioread32(common->base + reg); +} - ret = ioread32(priv->base + reg); +#define rcar_thermal_common_write(c, r, d) \ + _rcar_thermal_common_write(c, COMMON_ ##r, d) +static void _rcar_thermal_common_write(struct rcar_thermal_common *common, + u32 reg, u32 data) +{ + iowrite32(data, common->base + reg); +} - spin_unlock_irqrestore(&priv->lock, flags); +#define rcar_thermal_common_bset(c, r, m, d) \ + _rcar_thermal_common_bset(c, COMMON_ ##r, m, d) +static void _rcar_thermal_common_bset(struct rcar_thermal_common *common, + u32 reg, u32 mask, u32 data) +{ + u32 val; - return ret; + val = ioread32(common->base + reg); + val &= ~mask; + val |= (data & mask); + iowrite32(val, common->base + reg); } -#if 0 /* no user at this point */ -static void rcar_thermal_write(struct rcar_thermal_priv *priv, - u32 reg, u32 data) +#define rcar_thermal_read(p, r) _rcar_thermal_read(p, REG_ ##r) +static u32 _rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg) { - unsigned long flags; - - spin_lock_irqsave(&priv->lock, flags); + return ioread32(priv->base + reg); +} +#define rcar_thermal_write(p, r, d) _rcar_thermal_write(p, REG_ ##r, d) +static void _rcar_thermal_write(struct rcar_thermal_priv *priv, + u32 reg, u32 data) +{ iowrite32(data, priv->base + reg); - - spin_unlock_irqrestore(&priv->lock, flags); } -#endif -static void rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg, - u32 mask, u32 data) +#define rcar_thermal_bset(p, r, m, d) _rcar_thermal_bset(p, REG_ ##r, m, d) +static void _rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg, + u32 mask, u32 data) { - unsigned long flags; u32 val; - spin_lock_irqsave(&priv->lock, flags); - val = ioread32(priv->base + reg); val &= ~mask; val |= (data & mask); iowrite32(val, priv->base + reg); - - spin_unlock_irqrestore(&priv->lock, flags); } /* * zone device functions */ -static int rcar_thermal_get_temp(struct thermal_zone_device *zone, - unsigned long *temp) +static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv) { - struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); - int val, min, max, tmp; - - tmp = -200; /* default */ - while (1) { - if (priv->comp < 1 || priv->comp > 12) { - dev_err(priv->dev, - "THSSR invalid data (%d)\n", priv->comp); - priv->comp = 4; /* for next thermal */ - return -EINVAL; - } + struct device *dev = rcar_priv_to_dev(priv); + int i; + int ctemp, old, new; - /* - * THS comparator offset and the reference temperature - * - * Comparator | reference | Temperature field - * offset | temperature | measurement - * | (degrees C) | (degrees C) - * -------------+---------------+------------------- - * 1 | -45 | -45 to -30 - * 2 | -30 | -30 to -15 - * 3 | -15 | -15 to 0 - * 4 | 0 | 0 to +15 - * 5 | +15 | +15 to +30 - * 6 | +30 | +30 to +45 - * 7 | +45 | +45 to +60 - * 8 | +60 | +60 to +75 - * 9 | +75 | +75 to +90 - * 10 | +90 | +90 to +105 - * 11 | +105 | +105 to +120 - * 12 | +120 | +120 to +135 - */ + mutex_lock(&priv->lock); - /* calculate thermal limitation */ - min = (priv->comp * 15) - 60; - max = min + 15; + /* + * TSC decides a value of CPTAP automatically, + * and this is the conditions which validate interrupt. + */ + rcar_thermal_bset(priv, THSCR, CPCTL, CPCTL); + ctemp = 0; + old = ~0; + for (i = 0; i < 128; i++) { /* * we need to wait 300us after changing comparator offset * to get stable temperature. * see "Usage Notes" on datasheet */ - rcar_thermal_bset(priv, THSCR, CPTAP, priv->comp); udelay(300); - /* calculate current temperature */ - val = rcar_thermal_read(priv, THSSR) & CTEMP; - val = (val * 5) - 65; + new = rcar_thermal_read(priv, THSSR) & CTEMP; + if (new == old) { + ctemp = new; + break; + } + old = new; + } - dev_dbg(priv->dev, "comp/min/max/val = %d/%d/%d/%d\n", - priv->comp, min, max, val); + if (!ctemp) { + dev_err(dev, "thermal sensor was broken\n"); + return -EINVAL; + } - /* - * If val is same as min/max, then, - * it should try again on next comparator. - * But the val might be correct temperature. - * Keep it on "tmp" and compare with next val. - */ - if (tmp == val) - break; + /* + * enable IRQ + */ + if (rcar_has_irq_support(priv)) { + rcar_thermal_write(priv, FILONOFF, 0); - if (val <= min) { - tmp = min; - priv->comp--; /* try again */ - } else if (val >= max) { - tmp = max; - priv->comp++; /* try again */ - } else { - tmp = val; - break; - } + /* enable Rising/Falling edge interrupt */ + rcar_thermal_write(priv, POSNEG, 0x1); + rcar_thermal_write(priv, INTCTRL, (((ctemp - 0) << 8) | + ((ctemp - 1) << 0))); + } + + dev_dbg(dev, "thermal%d %d -> %d\n", priv->id, priv->ctemp, ctemp); + + priv->ctemp = ctemp; + + mutex_unlock(&priv->lock); + + return 0; +} + +static int rcar_thermal_get_temp(struct thermal_zone_device *zone, + unsigned long *temp) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + + if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv)) + rcar_thermal_update_temp(priv); + + mutex_lock(&priv->lock); + *temp = MCELSIUS((priv->ctemp * 5) - 65); + mutex_unlock(&priv->lock); + + return 0; +} + +static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone, + int trip, enum thermal_trip_type *type) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + /* see rcar_thermal_get_temp() */ + switch (trip) { + case 0: /* +90 <= temp */ + *type = THERMAL_TRIP_CRITICAL; + break; + default: + dev_err(dev, "rcar driver trip error\n"); + return -EINVAL; + } + + return 0; +} + +static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone, + int trip, unsigned long *temp) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + /* see rcar_thermal_get_temp() */ + switch (trip) { + case 0: /* +90 <= temp */ + *temp = MCELSIUS(90); + break; + default: + dev_err(dev, "rcar driver trip error\n"); + return -EINVAL; + } + + return 0; +} + +static int rcar_thermal_notify(struct thermal_zone_device *zone, + int trip, enum thermal_trip_type type) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + switch (type) { + case THERMAL_TRIP_CRITICAL: + /* FIXME */ + dev_warn(dev, "Thermal reached to critical temperature\n"); + break; + default: + break; } - *temp = MCELSIUS(tmp); return 0; } static struct thermal_zone_device_ops rcar_thermal_zone_ops = { - .get_temp = rcar_thermal_get_temp, + .get_temp = rcar_thermal_get_temp, + .get_trip_type = rcar_thermal_get_trip_type, + .get_trip_temp = rcar_thermal_get_trip_temp, + .notify = rcar_thermal_notify, }; /* - * platform functions + * interrupt */ -static int rcar_thermal_probe(struct platform_device *pdev) +#define rcar_thermal_irq_enable(p) _rcar_thermal_irq_ctrl(p, 1) +#define rcar_thermal_irq_disable(p) _rcar_thermal_irq_ctrl(p, 0) +static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable) +{ + struct rcar_thermal_common *common = priv->common; + unsigned long flags; + u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */ + + spin_lock_irqsave(&common->lock, flags); + + rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask); + + spin_unlock_irqrestore(&common->lock, flags); +} + +static void rcar_thermal_work(struct work_struct *work) { - struct thermal_zone_device *zone; struct rcar_thermal_priv *priv; - struct resource *res; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Could not get platform resource\n"); - return -ENODEV; + priv = container_of(work, struct rcar_thermal_priv, work.work); + + rcar_thermal_update_temp(priv); + rcar_thermal_irq_enable(priv); + thermal_zone_device_update(priv->zone); +} + +static u32 rcar_thermal_had_changed(struct rcar_thermal_priv *priv, u32 status) +{ + struct device *dev = rcar_priv_to_dev(priv); + + status = (status >> rcar_id_to_shift(priv)) & 0x3; + + if (status & 0x3) { + dev_dbg(dev, "thermal%d %s%s\n", + priv->id, + (status & 0x2) ? "Rising " : "", + (status & 0x1) ? "Falling" : ""); } - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) { - dev_err(&pdev->dev, "Could not allocate priv\n"); - return -ENOMEM; + return status; +} + +static irqreturn_t rcar_thermal_irq(int irq, void *data) +{ + struct rcar_thermal_common *common = data; + struct rcar_thermal_priv *priv; + unsigned long flags; + u32 status, mask; + + spin_lock_irqsave(&common->lock, flags); + + mask = rcar_thermal_common_read(common, INTMSK); + status = rcar_thermal_common_read(common, STR); + rcar_thermal_common_write(common, STR, 0x000F0F0F & mask); + + spin_unlock_irqrestore(&common->lock, flags); + + status = status & ~mask; + + /* + * check the status + */ + rcar_thermal_for_each_priv(priv, common) { + if (rcar_thermal_had_changed(priv, status)) { + rcar_thermal_irq_disable(priv); + schedule_delayed_work(&priv->work, + msecs_to_jiffies(300)); + } } - priv->comp = 4; /* basic setup */ - priv->dev = &pdev->dev; - spin_lock_init(&priv->lock); - priv->base = devm_ioremap_nocache(&pdev->dev, - res->start, resource_size(res)); - if (!priv->base) { - dev_err(&pdev->dev, "Unable to ioremap thermal register\n"); + return IRQ_HANDLED; +} + +/* + * platform functions + */ +static int rcar_thermal_probe(struct platform_device *pdev) +{ + struct rcar_thermal_common *common; + struct rcar_thermal_priv *priv; + struct device *dev = &pdev->dev; + struct resource *res, *irq; + int mres = 0; + int i; + int idle = IDLE_INTERVAL; + + common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL); + if (!common) { + dev_err(dev, "Could not allocate common\n"); return -ENOMEM; } - zone = thermal_zone_device_register("rcar_thermal", 0, 0, priv, - &rcar_thermal_zone_ops, NULL, 0, 0); - if (IS_ERR(zone)) { - dev_err(&pdev->dev, "thermal zone device is NULL\n"); - return PTR_ERR(zone); + INIT_LIST_HEAD(&common->head); + spin_lock_init(&common->lock); + common->dev = dev; + + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (irq) { + int ret; + + /* + * platform has IRQ support. + * Then, drier use common register + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); + if (!res) { + dev_err(dev, "Could not get platform resource\n"); + return -ENODEV; + } + + ret = devm_request_irq(dev, irq->start, rcar_thermal_irq, 0, + dev_name(dev), common); + if (ret) { + dev_err(dev, "irq request failed\n "); + return ret; + } + + /* + * rcar_has_irq_support() will be enabled + */ + common->base = devm_request_and_ioremap(dev, res); + if (!common->base) { + dev_err(dev, "Unable to ioremap thermal register\n"); + return -ENOMEM; + } + + /* enable temperature comparation */ + rcar_thermal_common_write(common, ENR, 0x00030303); + + idle = 0; /* polling delaye is not needed */ } - platform_set_drvdata(pdev, zone); + for (i = 0;; i++) { + res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); + if (!res) + break; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) { + dev_err(dev, "Could not allocate priv\n"); + return -ENOMEM; + } + + priv->base = devm_request_and_ioremap(dev, res); + if (!priv->base) { + dev_err(dev, "Unable to ioremap priv register\n"); + return -ENOMEM; + } - dev_info(&pdev->dev, "proved\n"); + priv->common = common; + priv->id = i; + mutex_init(&priv->lock); + INIT_LIST_HEAD(&priv->list); + INIT_DELAYED_WORK(&priv->work, rcar_thermal_work); + rcar_thermal_update_temp(priv); + + priv->zone = thermal_zone_device_register("rcar_thermal", + 1, 0, priv, + &rcar_thermal_zone_ops, NULL, 0, + idle); + if (IS_ERR(priv->zone)) { + dev_err(dev, "can't register thermal zone\n"); + goto error_unregister; + } + + list_move_tail(&priv->list, &common->head); + + if (rcar_has_irq_support(priv)) + rcar_thermal_irq_enable(priv); + } + + platform_set_drvdata(pdev, common); + + dev_info(dev, "%d sensor proved\n", i); return 0; + +error_unregister: + rcar_thermal_for_each_priv(priv, common) + thermal_zone_device_unregister(priv->zone); + + return -ENODEV; } static int rcar_thermal_remove(struct platform_device *pdev) { - struct thermal_zone_device *zone = platform_get_drvdata(pdev); + struct rcar_thermal_common *common = platform_get_drvdata(pdev); + struct rcar_thermal_priv *priv; + + rcar_thermal_for_each_priv(priv, common) + thermal_zone_device_unregister(priv->zone); - thermal_zone_device_unregister(zone); platform_set_drvdata(pdev, NULL); return 0; } +static const struct of_device_id rcar_thermal_dt_ids[] = { + { .compatible = "renesas,rcar-thermal", }, + {}, +}; +MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids); + static struct platform_driver rcar_thermal_driver = { .driver = { .name = "rcar_thermal", + .of_match_table = rcar_thermal_dt_ids, }, .probe = rcar_thermal_probe, .remove = rcar_thermal_remove, diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c index 6b2d8b21aaee..3c5ee5607977 100644 --- a/drivers/thermal/spear_thermal.c +++ b/drivers/thermal/spear_thermal.c @@ -131,7 +131,7 @@ static int spear_thermal_probe(struct platform_device *pdev) return -ENOMEM; } - stdev->clk = clk_get(&pdev->dev, NULL); + stdev->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(stdev->clk)) { dev_err(&pdev->dev, "Can't get clock\n"); return PTR_ERR(stdev->clk); @@ -140,7 +140,7 @@ static int spear_thermal_probe(struct platform_device *pdev) ret = clk_enable(stdev->clk); if (ret) { dev_err(&pdev->dev, "Can't enable clock\n"); - goto put_clk; + return ret; } stdev->flags = val; @@ -163,8 +163,6 @@ static int spear_thermal_probe(struct platform_device *pdev) disable_clk: clk_disable(stdev->clk); -put_clk: - clk_put(stdev->clk); return ret; } @@ -183,7 +181,6 @@ static int spear_thermal_exit(struct platform_device *pdev) writel_relaxed(actual_mask & ~stdev->flags, stdev->thermal_base); clk_disable(stdev->clk); - clk_put(stdev->clk); return 0; } diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 0cd5e9fbab1c..407cde3211c1 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -35,21 +35,54 @@ * state for this trip point * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling * state for this trip point + * c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit + * for this trip point + * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit + * for this trip point + * If the temperature is lower than a trip point, + * a. if the trend is THERMAL_TREND_RAISING, do nothing + * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling + * state for this trip point, if the cooling state already + * equals lower limit, deactivate the thermal instance + * c. if the trend is THERMAL_TREND_RAISE_FULL, do nothing + * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit, + * if the cooling state already equals lower limit, + * deactive the thermal instance */ static unsigned long get_target_state(struct thermal_instance *instance, - enum thermal_trend trend) + enum thermal_trend trend, bool throttle) { struct thermal_cooling_device *cdev = instance->cdev; unsigned long cur_state; cdev->ops->get_cur_state(cdev, &cur_state); - if (trend == THERMAL_TREND_RAISING) { - cur_state = cur_state < instance->upper ? - (cur_state + 1) : instance->upper; - } else if (trend == THERMAL_TREND_DROPPING) { - cur_state = cur_state > instance->lower ? - (cur_state - 1) : instance->lower; + switch (trend) { + case THERMAL_TREND_RAISING: + if (throttle) + cur_state = cur_state < instance->upper ? + (cur_state + 1) : instance->upper; + break; + case THERMAL_TREND_RAISE_FULL: + if (throttle) + cur_state = instance->upper; + break; + case THERMAL_TREND_DROPPING: + if (cur_state == instance->lower) { + if (!throttle) + cur_state = -1; + } else + cur_state -= 1; + break; + case THERMAL_TREND_DROP_FULL: + if (cur_state == instance->lower) { + if (!throttle) + cur_state = -1; + } else + cur_state = instance->lower; + break; + default: + break; } return cur_state; @@ -66,57 +99,14 @@ static void update_passive_instance(struct thermal_zone_device *tz, tz->passive += value; } -static void update_instance_for_throttle(struct thermal_zone_device *tz, - int trip, enum thermal_trip_type trip_type, - enum thermal_trend trend) -{ - struct thermal_instance *instance; - - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip) - continue; - - instance->target = get_target_state(instance, trend); - - /* Activate a passive thermal instance */ - if (instance->target == THERMAL_NO_TARGET) - update_passive_instance(tz, trip_type, 1); - - instance->cdev->updated = false; /* cdev needs update */ - } -} - -static void update_instance_for_dethrottle(struct thermal_zone_device *tz, - int trip, enum thermal_trip_type trip_type) -{ - struct thermal_instance *instance; - struct thermal_cooling_device *cdev; - unsigned long cur_state; - - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip || - instance->target == THERMAL_NO_TARGET) - continue; - - cdev = instance->cdev; - cdev->ops->get_cur_state(cdev, &cur_state); - - instance->target = cur_state > instance->lower ? - (cur_state - 1) : THERMAL_NO_TARGET; - - /* Deactivate a passive thermal instance */ - if (instance->target == THERMAL_NO_TARGET) - update_passive_instance(tz, trip_type, -1); - - cdev->updated = false; /* cdev needs update */ - } -} - static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) { long trip_temp; enum thermal_trip_type trip_type; enum thermal_trend trend; + struct thermal_instance *instance; + bool throttle = false; + int old_target; if (trip == THERMAL_TRIPS_NONE) { trip_temp = tz->forced_passive; @@ -128,12 +118,30 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) trend = get_tz_trend(tz, trip); + if (tz->temperature >= trip_temp) + throttle = true; + mutex_lock(&tz->lock); - if (tz->temperature >= trip_temp) - update_instance_for_throttle(tz, trip, trip_type, trend); - else - update_instance_for_dethrottle(tz, trip, trip_type); + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (instance->trip != trip) + continue; + + old_target = instance->target; + instance->target = get_target_state(instance, trend, throttle); + + /* Activate a passive thermal instance */ + if (old_target == THERMAL_NO_TARGET && + instance->target != THERMAL_NO_TARGET) + update_passive_instance(tz, trip_type, 1); + /* Deactivate a passive thermal instance */ + else if (old_target != THERMAL_NO_TARGET && + instance->target == THERMAL_NO_TARGET) + update_passive_instance(tz, trip_type, -1); + + + instance->cdev->updated = false; /* cdev needs update */ + } mutex_unlock(&tz->lock); } diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 84e95f32cdb6..5b7863a03f98 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -32,7 +32,6 @@ #include <linux/kdev_t.h> #include <linux/idr.h> #include <linux/thermal.h> -#include <linux/spinlock.h> #include <linux/reboot.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -348,8 +347,9 @@ static void handle_critical_trips(struct thermal_zone_device *tz, tz->ops->notify(tz, trip, trip_type); if (trip_type == THERMAL_TRIP_CRITICAL) { - pr_emerg("Critical temperature reached(%d C),shutting down\n", - tz->temperature / 1000); + dev_emerg(&tz->device, + "critical temperature reached(%d C),shutting down\n", + tz->temperature / 1000); orderly_poweroff(true); } } @@ -371,23 +371,57 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) monitor_thermal_zone(tz); } +static int thermal_zone_get_temp(struct thermal_zone_device *tz, + unsigned long *temp) +{ + int ret = 0; +#ifdef CONFIG_THERMAL_EMULATION + int count; + unsigned long crit_temp = -1UL; + enum thermal_trip_type type; +#endif + + mutex_lock(&tz->lock); + + ret = tz->ops->get_temp(tz, temp); +#ifdef CONFIG_THERMAL_EMULATION + if (!tz->emul_temperature) + goto skip_emul; + + for (count = 0; count < tz->trips; count++) { + ret = tz->ops->get_trip_type(tz, count, &type); + if (!ret && type == THERMAL_TRIP_CRITICAL) { + ret = tz->ops->get_trip_temp(tz, count, &crit_temp); + break; + } + } + + if (ret) + goto skip_emul; + + if (*temp < crit_temp) + *temp = tz->emul_temperature; +skip_emul: +#endif + mutex_unlock(&tz->lock); + return ret; +} + static void update_temperature(struct thermal_zone_device *tz) { long temp; int ret; - mutex_lock(&tz->lock); - - ret = tz->ops->get_temp(tz, &temp); + ret = thermal_zone_get_temp(tz, &temp); if (ret) { - pr_warn("failed to read out thermal zone %d\n", tz->id); - goto exit; + dev_warn(&tz->device, "failed to read out thermal zone %d\n", + tz->id); + return; } + mutex_lock(&tz->lock); tz->last_temperature = tz->temperature; tz->temperature = temp; - -exit: mutex_unlock(&tz->lock); } @@ -430,10 +464,7 @@ temp_show(struct device *dev, struct device_attribute *attr, char *buf) long temperature; int ret; - if (!tz->ops->get_temp) - return -EPERM; - - ret = tz->ops->get_temp(tz, &temperature); + ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; @@ -693,6 +724,31 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf) return sprintf(buf, "%s\n", tz->governor->name); } +#ifdef CONFIG_THERMAL_EMULATION +static ssize_t +emul_temp_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct thermal_zone_device *tz = to_thermal_zone(dev); + int ret = 0; + unsigned long temperature; + + if (kstrtoul(buf, 10, &temperature)) + return -EINVAL; + + if (!tz->ops->set_emul_temp) { + mutex_lock(&tz->lock); + tz->emul_temperature = temperature; + mutex_unlock(&tz->lock); + } else { + ret = tz->ops->set_emul_temp(tz, temperature); + } + + return ret ? ret : count; +} +static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store); +#endif/*CONFIG_THERMAL_EMULATION*/ + static DEVICE_ATTR(type, 0444, type_show, NULL); static DEVICE_ATTR(temp, 0444, temp_show, NULL); static DEVICE_ATTR(mode, 0644, mode_show, mode_store); @@ -835,7 +891,7 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) temp_input); struct thermal_zone_device *tz = temp->tz; - ret = tz->ops->get_temp(tz, &temperature); + ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; @@ -1522,6 +1578,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (!ops || !ops->get_temp) return ERR_PTR(-EINVAL); + if (trips > 0 && !ops->get_trip_type) + return ERR_PTR(-EINVAL); + tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL); if (!tz) return ERR_PTR(-ENOMEM); @@ -1585,6 +1644,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, goto unregister; } +#ifdef CONFIG_THERMAL_EMULATION + result = device_create_file(&tz->device, &dev_attr_emul_temp); + if (result) + goto unregister; +#endif /* Create policy attribute */ result = device_create_file(&tz->device, &dev_attr_policy); if (result) @@ -1704,7 +1768,8 @@ static struct genl_multicast_group thermal_event_mcgrp = { .name = THERMAL_GENL_MCAST_GROUP_NAME, }; -int thermal_generate_netlink_event(u32 orig, enum events event) +int thermal_generate_netlink_event(struct thermal_zone_device *tz, + enum events event) { struct sk_buff *skb; struct nlattr *attr; @@ -1714,6 +1779,9 @@ int thermal_generate_netlink_event(u32 orig, enum events event) int result; static unsigned int thermal_event_seqnum; + if (!tz) + return -EINVAL; + /* allocate memory */ size = nla_total_size(sizeof(struct thermal_genl_event)) + nla_total_size(0); @@ -1748,7 +1816,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) memset(thermal_event, 0, sizeof(struct thermal_genl_event)); - thermal_event->orig = orig; + thermal_event->orig = tz->id; thermal_event->event = event; /* send multicast genetlink message */ @@ -1760,7 +1828,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) result = genlmsg_multicast(skb, 0, thermal_event_mcgrp.id, GFP_ATOMIC); if (result) - pr_info("failed to send netlink event:%d\n", result); + dev_err(&tz->device, "Failed to send netlink event:%d", result); return result; } @@ -1800,6 +1868,7 @@ static int __init thermal_init(void) idr_destroy(&thermal_cdev_idr); mutex_destroy(&thermal_idr_lock); mutex_destroy(&thermal_list_lock); + return result; } result = genetlink_init(); return result; diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c index 372c8c0d54a0..950d354d50e2 100644 --- a/drivers/w1/masters/mxc_w1.c +++ b/drivers/w1/masters/mxc_w1.c @@ -157,9 +157,16 @@ static int mxc_w1_remove(struct platform_device *pdev) return 0; } +static struct of_device_id mxc_w1_dt_ids[] = { + { .compatible = "fsl,imx21-owire" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mxc_w1_dt_ids); + static struct platform_driver mxc_w1_driver = { .driver = { - .name = "mxc_w1", + .name = "mxc_w1", + .of_match_table = mxc_w1_dt_ids, }, .probe = mxc_w1_probe, .remove = mxc_w1_remove, |
